/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit. *
* See https://openmm.org/development. *
* *
* Portions copyright (c) 2008-2025 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "openmm/Context.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/common/BondedUtilities.h"
#include "openmm/common/CommonCalcNonbondedForce.h"
#include "openmm/common/CommonKernelUtilities.h"
#include "openmm/common/ContextSelector.h"
#include "openmm/common/NonbondedUtilities.h"
#include "CommonKernelSources.h"
#include "SimTKOpenMMRealType.h"
#include
#include
#include
#include
#include
using namespace OpenMM;
using namespace std;
class CommonCalcNonbondedForceKernel::ForceInfo : public ComputeForceInfo {
public:
ForceInfo(const NonbondedForce& force) : force(force) {
particleOffset.resize(force.getNumParticles(), -1);
exceptionOffset.resize(force.getNumExceptions(), -1);
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string parameter;
int particleIndex;
double chargeScale, sigmaScale, epsilonScale;
force.getParticleParameterOffset(i, parameter, particleIndex, chargeScale, sigmaScale, epsilonScale);
particleOffset[particleIndex] = i;
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string parameter;
int exceptionIndex;
double chargeProdScale, sigmaScale, epsilonScale;
force.getExceptionParameterOffset(i, parameter, exceptionIndex, chargeProdScale, sigmaScale, epsilonScale);
exceptionOffset[exceptionIndex] = i;
}
}
bool areParticlesIdentical(int particle1, int particle2) {
if (particleOffset[particle1] != -1 || particleOffset[particle2] != -1) {
if (particleOffset[particle1] == -1 || particleOffset[particle2] == -1)
return false;
string parameter1, parameter2;
int particleIndex1, particleIndex2;
double chargeScale1, sigmaScale1, epsilonScale1, chargeScale2, sigmaScale2, epsilonScale2;
force.getParticleParameterOffset(particleOffset[particle1], parameter1, particleIndex1, chargeScale1, sigmaScale1, epsilonScale1);
force.getParticleParameterOffset(particleOffset[particle2], parameter2, particleIndex2, chargeScale2, sigmaScale2, epsilonScale2);
if (parameter1 != parameter2 || chargeScale1 != chargeScale2 || sigmaScale1 != sigmaScale2 || epsilonScale1 != epsilonScale2)
return false;
}
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
int getNumParticleGroups() {
return force.getNumExceptions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
if (exceptionOffset[group1] != -1 || exceptionOffset[group2] != -1) {
if (exceptionOffset[group1] == -1 || exceptionOffset[group2] == -1)
return false;
string parameter1, parameter2;
int exceptionIndex1, exceptionIndex2;
double chargeProdScale1, sigmaScale1, epsilonScale1, chargeProdScale2, sigmaScale2, epsilonScale2;
force.getExceptionParameterOffset(exceptionOffset[group1], parameter1, exceptionIndex1, chargeProdScale1, sigmaScale1, epsilonScale1);
force.getExceptionParameterOffset(exceptionOffset[group2], parameter2, exceptionIndex2, chargeProdScale2, sigmaScale2, epsilonScale2);
if (parameter1 != parameter2 || chargeProdScale1 != chargeProdScale2 || sigmaScale1 != sigmaScale2 || epsilonScale1 != epsilonScale2)
return false;
}
int particle1, particle2;
double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
private:
const NonbondedForce& force;
vector particleOffset, exceptionOffset;
};
class CommonCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public:
PmeIO(ComputeContext& cc, ComputeKernel addForcesKernel) : cc(cc), addForcesKernel(addForcesKernel) {
forceTemp.initialize(cc, cc.getNumAtoms(), "PmeForce");
addForcesKernel->addArg(forceTemp);
addForcesKernel->addArg();
}
float* getPosq() {
ContextSelector selector(cc);
cc.getPosq().download(posq);
return (float*) &posq[0];
}
void setForce(float* force) {
forceTemp.upload(force);
addForcesKernel->setArg(1, cc.getLongForceBuffer());
addForcesKernel->execute(cc.getNumAtoms());
}
void setChargeDerivatives(float* chargeDerivatives) {
}
private:
ComputeContext& cc;
vector posq;
ComputeArray forceTemp;
ComputeKernel addForcesKernel;
};
class CommonCalcNonbondedForceKernel::PmePreComputation : public ComputeContext::ForcePreComputation {
public:
PmePreComputation(ComputeContext& cc, Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : cc(cc), pme(pme), io(io) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
Vec3 boxVectors[3];
cc.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
pme.getAs().beginComputation(io, boxVectors, includeEnergy, true, false);
}
private:
ComputeContext& cc;
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class CommonCalcNonbondedForceKernel::PmePostComputation : public ComputeContext::ForcePostComputation {
public:
PmePostComputation(Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : pme(pme), io(io) {
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
return pme.getAs().finishComputation(io);
}
private:
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class CommonCalcNonbondedForceKernel::SyncQueuePreComputation : public ComputeContext::ForcePreComputation {
public:
SyncQueuePreComputation(ComputeContext& cc, ComputeQueue queue, ComputeEvent event, int forceGroup) : cc(cc), queue(queue), event(event), forceGroup(forceGroup) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1<enqueue();
event->queueWait(queue);
}
}
private:
ComputeContext& cc;
ComputeQueue queue;
ComputeEvent event;
int forceGroup;
};
class CommonCalcNonbondedForceKernel::SyncQueuePostComputation : public ComputeContext::ForcePostComputation {
public:
SyncQueuePostComputation(ComputeContext& cc, ComputeEvent event, ComputeArray& pmeEnergyBuffer, int forceGroup) : cc(cc), event(event),
pmeEnergyBuffer(pmeEnergyBuffer), forceGroup(forceGroup) {
}
void setKernel(ComputeKernel kernel) {
addEnergyKernel = kernel;
addEnergyKernel->addArg(pmeEnergyBuffer);
addEnergyKernel->addArg(cc.getEnergyBuffer());
addEnergyKernel->addArg((int) pmeEnergyBuffer.getSize());
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1<queueWait(cc.getCurrentQueue());
if (includeEnergy)
addEnergyKernel->execute(pmeEnergyBuffer.getSize());
}
return 0.0;
}
private:
ComputeContext& cc;
ComputeEvent event;
ComputeKernel addEnergyKernel;
ComputeArray& pmeEnergyBuffer;
int forceGroup;
};
CommonCalcNonbondedForceKernel::~CommonCalcNonbondedForceKernel() {
ContextSelector selector(cc);
if (pmeio != NULL)
delete pmeio;
}
void CommonCalcNonbondedForceKernel::commonInitialize(const System& system, const NonbondedForce& force, bool usePmeQueue,
bool deviceIsCpu, bool useFixedPointChargeSpreading, bool useCpuPme) {
this->usePmeQueue = false;
this->deviceIsCpu = deviceIsCpu;
this->useFixedPointChargeSpreading = useFixedPointChargeSpreading;
this->useCpuPme = useCpuPme;
ContextSelector selector(cc);
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "nonbonded"+cc.intToString(forceIndex)+"_";
// Identify which exceptions are 1-4 interactions.
set exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector > exclusions;
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end()) {
exceptionIndex[i] = exceptions.size();
exceptions.push_back(i);
}
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
vector baseParticleParamVec(cc.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector > exclusionList(numParticles);
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
baseParticleParamVec[i] = mm_float4(charge, sigma, epsilon, 0);
exclusionList[i].push_back(i);
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (auto exclusion : exclusions) {
exclusionList[exclusion.first].push_back(exclusion.second);
exclusionList[exclusion.second].push_back(exclusion.first);
}
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME && hasLJ);
usePosqCharges = hasCoulomb ? cc.requestPosqCharges() : false;
pmeDispersionSpreadWaveSize = 64;
pmeDispersionSpreadBlockSize = 256;
pmeDispersionAtomsPerWave = pmeDispersionSpreadWaveSize/PmeOrder;
pmeDispersionAtomsPerBlock = (pmeDispersionSpreadBlockSize/pmeDispersionSpreadWaveSize)*pmeDispersionAtomsPerWave;
// The LDS spread path assumes wave64 execution and is only used for HIP LJ-PME fixed point spreading.
usePmeDispersionWave64LdsSpread = (getPlatform().getName() == "HIP" &&
doLJPME && useFixedPointChargeSpreading && PmeOrder == 5 &&
cc.getSIMDWidth() == pmeDispersionSpreadWaveSize &&
cc.getMaxThreadBlockSize() >= pmeDispersionSpreadBlockSize);
map defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
defines["USE_LJ_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
if (useCutoff) {
// Compute the reaction field constants.
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = cc.doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = cc.doubleToString(reactionFieldC);
// Compute the switching coefficients.
if (force.getUseSwitchingFunction()) {
defines["LJ_SWITCH_CUTOFF"] = cc.doubleToString(force.getSwitchingDistance());
defines["LJ_SWITCH_C3"] = cc.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0));
defines["LJ_SWITCH_C4"] = cc.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0));
defines["LJ_SWITCH_C5"] = cc.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (force.getUseDispersionCorrection() && cc.getContextIndex() == 0 && !doLJPME)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
totalCharge = 0.0;
map paramsDefines;
paramsDefines["ONE_4PI_EPS0"] = cc.doubleToString(ONE_4PI_EPS0);
paramsDefines["EPSILON0"] = cc.doubleToString(EPSILON0);
paramsDefines["WORK_GROUP_SIZE"] = cc.intToString(cc.ThreadBlockSize);
paramsDefines["CHARGE_BUFFER_SIZE"] = cc.intToString(cc.getNumThreadBlocks());
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (force.getNumParticleParameterOffsets() > 0)
paramsDefines["HAS_PARTICLE_OFFSETS"] = "1";
if (force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_EXCEPTION_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (doLJPME)
paramsDefines["INCLUDE_LJPME_EXCEPTIONS"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = cc.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
if (cc.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cc.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
totalCharge += baseParticleParamVec[i].x;
}
// Create the reciprocal space kernels.
map replacements;
replacements["NUM_ATOMS"] = cc.intToString(numParticles);
replacements["PADDED_NUM_ATOMS"] = cc.intToString(cc.getPaddedNumAtoms());
replacements["KMAX_X"] = cc.intToString(kmaxx);
replacements["KMAX_Y"] = cc.intToString(kmaxy);
replacements["KMAX_Z"] = cc.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cc.doubleToString(-1.0/(4.0*alpha*alpha));
replacements["ONE_4PI_EPS0"] = cc.doubleToString(ONE_4PI_EPS0);
replacements["M_PI"] = cc.doubleToString(M_PI);
ComputeProgram program = cc.compileProgram(CommonKernelSources::ewald, replacements);
ewaldSumsKernel = program->createKernel("calculateEwaldCosSinSums");
ewaldForcesKernel = program->createKernel("calculateEwaldForces");
int elementSize = (cc.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
cosSinSums.initialize(cc, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = cc.findLegalFFTDimension(gridSizeX);
gridSizeY = cc.findLegalFFTDimension(gridSizeY);
gridSizeZ = cc.findLegalFFTDimension(gridSizeZ);
if (doLJPME) {
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = cc.findLegalFFTDimension(dispersionGridSizeX);
dispersionGridSizeY = cc.findLegalFFTDimension(dispersionGridSizeY);
dispersionGridSizeZ = cc.findLegalFFTDimension(dispersionGridSizeZ);
}
defines["EWALD_ALPHA"] = cc.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME) {
defines["EWALD_DISPERSION_ALPHA"] = cc.doubleToString(dispersionAlpha);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cc.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cc.doubleToString(multShift6);
}
if (cc.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cc.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
totalCharge += baseParticleParamVec[i].x;
}
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cc.doubleToString(pow(dispersionAlpha, 6)/3.0);
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
pmeDefines["PME_ORDER"] = cc.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cc.intToString(numParticles);
pmeDefines["PADDED_NUM_ATOMS"] = cc.intToString(cc.getPaddedNumAtoms());
pmeDefines["NUM_INDICES"] = "0";
pmeDefines["RECIP_EXP_FACTOR"] = cc.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = cc.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = cc.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cc.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cc.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cc.doubleToString(M_PI);
if (useFixedPointChargeSpreading)
pmeDefines["USE_FIXED_POINT_CHARGE_SPREADING"] = "1";
if (deviceIsCpu)
pmeDefines["DEVICE_IS_CPU"] = "1";
if (useCpuPme && !doLJPME && usePosqCharges) {
// Create the CPU PME kernel.
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cc.getContextImpl());
cpuPme.getAs().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, {}, alpha, false);
ComputeProgram program = cc.compileProgram(CommonKernelSources::pme, pmeDefines);
ComputeKernel addForcesKernel = program->createKernel("addForces");
pmeio = new PmeIO(cc, addForcesKernel);
cc.addPreComputation(new PmePreComputation(cc, cpuPme, *pmeio));
cc.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
}
}
if (pmeio == NULL) {
// Create required data structures.
int elementSize = (cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME) {
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
pmeGrid1.initialize(cc, gridElements, 2*elementSize, "pmeGrid1");
pmeGrid2.initialize(cc, gridElements, 2*elementSize, "pmeGrid2");
if (useFixedPointChargeSpreading)
cc.addAutoclearBuffer(pmeGrid2);
else
cc.addAutoclearBuffer(pmeGrid1);
pmeBsplineModuliX.initialize(cc, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY.initialize(cc, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ.initialize(cc, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX.initialize(cc, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY.initialize(cc, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ.initialize(cc, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeAtomGridIndex.initialize(cc, numParticles, "pmeAtomGridIndex");
if (doLJPME)
pmeDispersionAtomGridIndex.initialize(cc, numParticles, "pmeDispersionAtomGridIndex");
int energyElementSize = (cc.getUseDoublePrecision() || cc.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer.initialize(cc, cc.getNumThreadBlocks()*ComputeContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cc.clearBuffer(pmeEnergyBuffer);
sort = cc.createSort(new SortTrait(), cc.getNumAtoms());
fft = cc.createFFT(gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
dispersionFft = cc.createFFT(dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
this->usePmeQueue = usePmeQueue;
if (usePmeQueue) {
pmeDefines["USE_PME_STREAM"] = "1";
pmeQueue = cc.createQueue();
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
pmeSyncEvent = cc.createEvent();
paramsSyncEvent = cc.createEvent();
cc.addPreComputation(new SyncQueuePreComputation(cc, pmeQueue, pmeSyncEvent, recipForceGroup));
cc.addPostComputation(syncQueue = new SyncQueuePostComputation(cc, pmeSyncEvent, pmeEnergyBuffer, recipForceGroup));
}
// Initialize the b-spline moduli.
for (int grid = 0; grid < 2; grid++) {
int xsize, ysize, zsize;
ComputeArray *xmoduli, *ymoduli, *zmoduli;
if (grid == 0) {
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = &pmeBsplineModuliX;
ymoduli = &pmeBsplineModuliY;
zmoduli = &pmeBsplineModuliZ;
}
else {
if (!doLJPME)
continue;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = &pmeDispersionBsplineModuliX;
ymoduli = &pmeDispersionBsplineModuliY;
zmoduli = &pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector data(PmeOrder);
vector ddata(PmeOrder);
vector bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = sc*sc+ss*ss;
}
for (int i = 0; i < ndata; i++)
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[(i-1+ndata)%ndata]+moduli[(i+1)%ndata])*0.5;
if (dim == 0)
xmoduli->upload(moduli, true);
else if (dim == 1)
ymoduli->upload(moduli, true);
else
zmoduli->upload(moduli, true);
}
}
}
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cc.getNumContexts();
int startIndex = cc.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cc.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector > atoms(numExclusions, vector(2));
exclusionAtoms.initialize(cc, numExclusions, "exclusionAtoms");
exclusionParams.initialize(cc, numExclusions, "exclusionParams");
vector exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = mm_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map replacements;
replacements["PARAMS"] = cc.getBondedUtilities().addArgument(exclusionParams, "float4");
replacements["EWALD_ALPHA"] = cc.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cc.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cc.doubleToString(dispersionAlpha);
if (force.getIncludeDirectSpace())
cc.getBondedUtilities().addInteraction(atoms, cc.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cc.replaceStrings(CommonKernelSources::coulombLennardJones, defines);
charges.initialize(cc, cc.getPaddedNumAtoms(), cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize(cc, cc.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec);
map replacements;
replacements["ONE_4PI_EPS0"] = cc.doubleToString(ONE_4PI_EPS0);
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb && !usePosqCharges)
cc.getNonbondedUtilities().addParameter(ComputeParameterInfo(charges, prefix+"charge", "real", 1));
sigmaEpsilon.initialize(cc, cc.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) {
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
cc.getNonbondedUtilities().addParameter(ComputeParameterInfo(sigmaEpsilon, prefix+"sigmaEpsilon", "float", 2));
}
source = cc.replaceStrings(source, replacements);
if (force.getIncludeDirectSpace())
cc.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), numParticles > 3000, true);
// Initialize the exceptions.
int numContexts = cc.getNumContexts();
int startIndex = cc.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cc.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
paramsDefines["HAS_EXCEPTIONS"] = "1";
exceptionAtoms.resize(numExceptions);
vector > atoms(numExceptions, vector(2));
exceptionParams.initialize(cc, numExceptions, "exceptionParams");
baseExceptionParams.initialize(cc, numExceptions, "baseExceptionParams");
vector baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
baseExceptionParamsVec[i] = mm_float4(chargeProd, sigma, epsilon, 0);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
baseExceptionParams.upload(baseExceptionParamsVec);
map replacements;
replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cc.getBondedUtilities().addArgument(exceptionParams, "float4");
if (force.getIncludeDirectSpace())
cc.getBondedUtilities().addInteraction(atoms, cc.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
// Initialize parameter offsets.
vector > particleOffsetVec(force.getNumParticles());
vector > exceptionOffsetVec(numExceptions);
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
int paramIndex = cc.registerGlobalParam(param);
paramIndices[param] = paramIndex;
particleOffsetVec[particle].push_back(mm_float4(charge, sigma, epsilon, (float) paramIndex));
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
int index = exceptionIndex[exception];
if (index < startIndex || index >= endIndex)
continue;
int paramIndex = cc.registerGlobalParam(param);
paramIndices[param] = paramIndex;
exceptionOffsetVec[index-startIndex].push_back(mm_float4(charge, sigma, epsilon, (float) paramIndex));
}
for (int i = 0; i < force.getNumGlobalParameters(); i++)
paramValues[force.getGlobalParameterName(i)] = force.getGlobalParameterDefaultValue(i);
particleParamOffsets.initialize(cc, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
particleOffsetIndices.initialize(cc, cc.getPaddedNumAtoms()+1, "particleOffsetIndices");
vector particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) {
particleOffsetIndicesVec.push_back(p.size());
for (int j = 0; j < particleOffsetVec[i].size(); j++)
p.push_back(particleOffsetVec[i][j]);
}
while (particleOffsetIndicesVec.size() < particleOffsetIndices.getSize())
particleOffsetIndicesVec.push_back(p.size());
for (int i = 0; i < exceptionOffsetVec.size(); i++) {
exceptionOffsetIndicesVec.push_back(e.size());
for (int j = 0; j < exceptionOffsetVec[i].size(); j++)
e.push_back(exceptionOffsetVec[i][j]);
}
exceptionOffsetIndicesVec.push_back(e.size());
if (force.getNumParticleParameterOffsets() > 0) {
particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec);
}
exceptionParamOffsets.initialize(cc, max((int) e.size(), 1), "exceptionParamOffsets");
exceptionOffsetIndices.initialize(cc, exceptionOffsetIndicesVec.size(), "exceptionOffsetIndices");
if (e.size() > 0) {
exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
}
chargeBuffer.initialize(cc, cc.getNumThreadBlocks(), cc.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "chargeBuffer");
cc.clearBuffer(chargeBuffer);
recomputeParams = true;
// Initialize the kernel for updating parameters.
ComputeProgram program = cc.compileProgram(CommonKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = program->createKernel("computeParameters");
computeExclusionParamsKernel = program->createKernel("computeExclusionParameters");
computePlasmaCorrectionKernel = program->createKernel("computePlasmaCorrection");
info = new ForceInfo(force);
cc.addForce(info);
}
double CommonCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
ContextSelector selector(cc);
if (!hasInitializedKernel) {
hasInitializedKernel = true;
computeParamsKernel->addArg(cc.getEnergyBuffer());
computeParamsKernel->addArg();
computeParamsKernel->addArg(cc.getGlobalParamValues());
computeParamsKernel->addArg(cc.getPaddedNumAtoms());
computeParamsKernel->addArg(baseParticleParams);
computeParamsKernel->addArg(cc.getPosq());
computeParamsKernel->addArg(charges);
computeParamsKernel->addArg(sigmaEpsilon);
computeParamsKernel->addArg(particleParamOffsets);
computeParamsKernel->addArg(particleOffsetIndices);
computeParamsKernel->addArg(chargeBuffer);
if (exceptionParams.isInitialized()) {
computeParamsKernel->addArg((int) exceptionParams.getSize());
computeParamsKernel->addArg(baseExceptionParams);
computeParamsKernel->addArg(exceptionParams);
computeParamsKernel->addArg(exceptionParamOffsets);
computeParamsKernel->addArg(exceptionOffsetIndices);
}
if (exclusionParams.isInitialized()) {
computeExclusionParamsKernel->addArg(cc.getPosq());
computeExclusionParamsKernel->addArg(charges);
computeExclusionParamsKernel->addArg(sigmaEpsilon);
computeExclusionParamsKernel->addArg((int) exclusionParams.getSize());
computeExclusionParamsKernel->addArg(exclusionAtoms);
computeExclusionParamsKernel->addArg(exclusionParams);
}
computePlasmaCorrectionKernel->addArg(chargeBuffer);
computePlasmaCorrectionKernel->addArg(cc.getEnergyBuffer());
if (cc.getUseDoublePrecision())
computePlasmaCorrectionKernel->addArg(alpha);
else
computePlasmaCorrectionKernel->addArg((float) alpha);
computePlasmaCorrectionKernel->addArg();
if (cosSinSums.isInitialized()) {
ewaldSumsKernel->addArg(cc.getEnergyBuffer());
ewaldSumsKernel->addArg(cc.getPosq());
ewaldSumsKernel->addArg(cosSinSums);
ewaldSumsKernel->addArg();
ewaldForcesKernel->addArg(cc.getLongForceBuffer());
ewaldForcesKernel->addArg(cc.getPosq());
ewaldForcesKernel->addArg(cosSinSums);
ewaldForcesKernel->addArg();
}
if (pmeGrid1.isInitialized()) {
// Create kernels for Coulomb PME.
map replacements;
replacements["CHARGE"] = (usePosqCharges ? "pos.w" : "charges[atom]");
ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonKernelSources::pme, replacements), pmeDefines);
pmeGridIndexKernel = program->createKernel("findAtomGridIndex");
pmeSpreadChargeKernel = program->createKernel("gridSpreadCharge");
pmeConvolutionKernel = program->createKernel("reciprocalConvolution");
pmeEvalEnergyKernel = program->createKernel("gridEvaluateEnergy");
pmeInterpolateForceKernel = program->createKernel("gridInterpolateForce");
pmeGridIndexKernel->addArg(cc.getPosq());
pmeGridIndexKernel->addArg(pmeAtomGridIndex);
for (int i = 0; i < 8; i++)
pmeGridIndexKernel->addArg();
pmeSpreadChargeKernel->addArg(cc.getPosq());
if (useFixedPointChargeSpreading)
pmeSpreadChargeKernel->addArg(pmeGrid2);
else
pmeSpreadChargeKernel->addArg(pmeGrid1);
for (int i = 0; i < 8; i++)
pmeSpreadChargeKernel->addArg();
pmeSpreadChargeKernel->addArg(pmeAtomGridIndex);
pmeSpreadChargeKernel->addArg(charges);
pmeConvolutionKernel->addArg(pmeGrid2);
pmeConvolutionKernel->addArg(pmeBsplineModuliX);
pmeConvolutionKernel->addArg(pmeBsplineModuliY);
pmeConvolutionKernel->addArg(pmeBsplineModuliZ);
for (int i = 0; i < 3; i++)
pmeConvolutionKernel->addArg();
pmeEvalEnergyKernel->addArg(pmeGrid2);
if (usePmeQueue)
pmeEvalEnergyKernel->addArg(pmeEnergyBuffer);
else
pmeEvalEnergyKernel->addArg(cc.getEnergyBuffer());
pmeEvalEnergyKernel->addArg(pmeBsplineModuliX);
pmeEvalEnergyKernel->addArg(pmeBsplineModuliY);
pmeEvalEnergyKernel->addArg(pmeBsplineModuliZ);
for (int i = 0; i < 3; i++)
pmeEvalEnergyKernel->addArg();
pmeInterpolateForceKernel->addArg(cc.getPosq());
pmeInterpolateForceKernel->addArg(cc.getLongForceBuffer());
pmeInterpolateForceKernel->addArg(pmeGrid1);
for (int i = 0; i < 8; i++)
pmeInterpolateForceKernel->addArg();
pmeInterpolateForceKernel->addArg(pmeAtomGridIndex);
pmeInterpolateForceKernel->addArg(charges);
if (useFixedPointChargeSpreading) {
pmeFinishSpreadChargeKernel = program->createKernel("finishSpreadCharge");
pmeFinishSpreadChargeKernel->addArg(pmeGrid2);
pmeFinishSpreadChargeKernel->addArg(pmeGrid1);
}
if (usePmeQueue)
syncQueue->setKernel(program->createKernel("addEnergy"));
if (doLJPME) {
// Create kernels for LJ PME.
pmeDefines["EWALD_ALPHA"] = cc.doubleToString(dispersionAlpha);
pmeDefines["GRID_SIZE_X"] = cc.intToString(dispersionGridSizeX);
pmeDefines["GRID_SIZE_Y"] = cc.intToString(dispersionGridSizeY);
pmeDefines["GRID_SIZE_Z"] = cc.intToString(dispersionGridSizeZ);
pmeDefines["EPSILON_FACTOR"] = "1";
pmeDefines["RECIP_EXP_FACTOR"] = cc.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1";
pmeDefines["CHARGE_FROM_SIGEPS"] = "1";
if (usePmeDispersionWave64LdsSpread) {
pmeDefines["PME_USE_WAVE64_LDS_SPREAD"] = "1";
pmeDefines["PME_SPREAD_WAVE_SIZE"] = cc.intToString(pmeDispersionSpreadWaveSize);
pmeDefines["PME_SPREAD_BLOCK_SIZE"] = cc.intToString(pmeDispersionSpreadBlockSize);
pmeDefines["PME_SPREAD_ATOMS_PER_WAVE"] = cc.intToString(pmeDispersionAtomsPerWave);
pmeDefines["PME_SPREAD_ATOMS_PER_BLOCK"] = cc.intToString(pmeDispersionAtomsPerBlock);
}
program = cc.compileProgram(CommonKernelSources::pme, pmeDefines);
pmeDispersionGridIndexKernel = program->createKernel("findAtomGridIndex");
pmeDispersionSpreadChargeKernel = program->createKernel(usePmeDispersionWave64LdsSpread ? "gridSpreadChargeWave64Lds" : "gridSpreadCharge");
pmeDispersionConvolutionKernel = program->createKernel("reciprocalConvolution");
pmeDispersionEvalEnergyKernel = program->createKernel("gridEvaluateEnergy");
pmeDispersionInterpolateForceKernel = program->createKernel("gridInterpolateForce");
pmeDispersionGridIndexKernel->addArg(cc.getPosq());
pmeDispersionGridIndexKernel->addArg(pmeDispersionAtomGridIndex);
for (int i = 0; i < 8; i++)
pmeDispersionGridIndexKernel->addArg();
pmeDispersionSpreadChargeKernel->addArg(cc.getPosq());
if (useFixedPointChargeSpreading)
pmeDispersionSpreadChargeKernel->addArg(pmeGrid2);
else
pmeDispersionSpreadChargeKernel->addArg(pmeGrid1);
for (int i = 0; i < 8; i++)
pmeDispersionSpreadChargeKernel->addArg();
pmeDispersionSpreadChargeKernel->addArg(pmeDispersionAtomGridIndex);
pmeDispersionSpreadChargeKernel->addArg(sigmaEpsilon);
pmeDispersionConvolutionKernel->addArg(pmeGrid2);
pmeDispersionConvolutionKernel->addArg(pmeDispersionBsplineModuliX);
pmeDispersionConvolutionKernel->addArg(pmeDispersionBsplineModuliY);
pmeDispersionConvolutionKernel->addArg(pmeDispersionBsplineModuliZ);
for (int i = 0; i < 3; i++)
pmeDispersionConvolutionKernel->addArg();
pmeDispersionEvalEnergyKernel->addArg(pmeGrid2);
if (usePmeQueue)
pmeDispersionEvalEnergyKernel->addArg(pmeEnergyBuffer);
else
pmeDispersionEvalEnergyKernel->addArg(cc.getEnergyBuffer());
pmeDispersionEvalEnergyKernel->addArg(pmeDispersionBsplineModuliX);
pmeDispersionEvalEnergyKernel->addArg(pmeDispersionBsplineModuliY);
pmeDispersionEvalEnergyKernel->addArg(pmeDispersionBsplineModuliZ);
for (int i = 0; i < 3; i++)
pmeDispersionEvalEnergyKernel->addArg();
pmeDispersionInterpolateForceKernel->addArg(cc.getPosq());
pmeDispersionInterpolateForceKernel->addArg(cc.getLongForceBuffer());
pmeDispersionInterpolateForceKernel->addArg(pmeGrid1);
for (int i = 0; i < 8; i++)
pmeDispersionInterpolateForceKernel->addArg();
pmeDispersionInterpolateForceKernel->addArg(pmeDispersionAtomGridIndex);
pmeDispersionInterpolateForceKernel->addArg(sigmaEpsilon);
if (useFixedPointChargeSpreading) {
pmeDispersionFinishSpreadChargeKernel = program->createKernel("finishSpreadCharge");
pmeDispersionFinishSpreadChargeKernel->addArg(pmeGrid2);
pmeDispersionFinishSpreadChargeKernel->addArg(pmeGrid1);
}
}
}
}
// Update particle and exception parameters.
for (auto param : paramValues) {
double value = context.getParameter(param.first);
if (value != param.second) {
paramValues[param.first] = value;
recomputeParams = true;
}
}
double energy = 0.0;
if (includeReciprocal && (pmeGrid1.isInitialized() || cosSinSums.isInitialized())) {
Vec3 a, b, c;
cc.getPeriodicBoxVectors(a, b, c);
double volume = a[0]*b[1]*c[2];
energy = ewaldSelfEnergy - totalCharge*totalCharge/(8*EPSILON0*volume*alpha*alpha);
}
if (recomputeParams || hasOffsets) {
computeParamsKernel->setArg(1, (int) (includeEnergy && includeReciprocal));
computeParamsKernel->execute(cc.getNumAtoms());
if (exclusionParams.isInitialized())
computeExclusionParamsKernel->execute(exclusionParams.getSize());
if (usePmeQueue) {
paramsSyncEvent->enqueue();
paramsSyncEvent->queueWait(pmeQueue);
}
if (hasOffsets) {
// The Ewald self energy was computed in the kernel.
energy = 0.0;
if ((pmeGrid1.isInitialized() || cosSinSums.isInitialized()) && includeReciprocal) {
// Invoke a kernel to compute the correction for the neutralizing plasma.
Vec3 a, b, c;
cc.getPeriodicBoxVectors(a, b, c);
double volume = a[0]*b[1]*c[2];
if (cc.getUseDoublePrecision())
computePlasmaCorrectionKernel->setArg(3, volume);
else
computePlasmaCorrectionKernel->setArg(3, (float) volume);
computePlasmaCorrectionKernel->execute(ComputeContext::ThreadBlockSize, ComputeContext::ThreadBlockSize);
}
}
recomputeParams = false;
}
// Do reciprocal space calculations.
if (cosSinSums.isInitialized() && includeReciprocal) {
Vec3 a, b, c;
cc.getPeriodicBoxVectors(a, b, c);
if (cc.getUseDoublePrecision()) {
ewaldSumsKernel->setArg(3, mm_double4(a[0], b[1], c[2], 0));
ewaldForcesKernel->setArg(3, mm_double4(a[0], b[1], c[2], 0));
}
else {
ewaldSumsKernel->setArg(3, mm_float4((float) a[0], (float) b[1], (float) c[2], 0));
ewaldForcesKernel->setArg(3, mm_float4((float) a[0], (float) b[1], (float) c[2], 0));
}
ewaldSumsKernel->execute(cosSinSums.getSize());
ewaldForcesKernel->execute(cc.getNumAtoms());
}
if (pmeGrid1.isInitialized() && includeReciprocal) {
if (usePmeQueue)
cc.setCurrentQueue(pmeQueue);
// Invert the periodic box vectors.
Vec3 boxVectors[3];
cc.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
double scale = 1.0/determinant;
mm_double4 recipBoxVectors[3];
recipBoxVectors[0] = mm_double4(boxVectors[1][1]*boxVectors[2][2]*scale, 0, 0, 0);
recipBoxVectors[1] = mm_double4(-boxVectors[1][0]*boxVectors[2][2]*scale, boxVectors[0][0]*boxVectors[2][2]*scale, 0, 0);
recipBoxVectors[2] = mm_double4((boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0])*scale, -boxVectors[0][0]*boxVectors[2][1]*scale, boxVectors[0][0]*boxVectors[1][1]*scale, 0);
mm_float4 recipBoxVectorsFloat[3];
for (int i = 0; i < 3; i++)
recipBoxVectorsFloat[i] = mm_float4((float) recipBoxVectors[i].x, (float) recipBoxVectors[i].y, (float) recipBoxVectors[i].z, 0);
// Execute the reciprocal space kernels.
if (hasCoulomb) {
if (stepsToSort <= 0) {
setPeriodicBoxArgs(cc, pmeGridIndexKernel, 2);
if (cc.getUseDoublePrecision()) {
pmeGridIndexKernel->setArg(7, recipBoxVectors[0]);
pmeGridIndexKernel->setArg(8, recipBoxVectors[1]);
pmeGridIndexKernel->setArg(9, recipBoxVectors[2]);
}
else {
pmeGridIndexKernel->setArg(7, recipBoxVectorsFloat[0]);
pmeGridIndexKernel->setArg(8, recipBoxVectorsFloat[1]);
pmeGridIndexKernel->setArg(9, recipBoxVectorsFloat[2]);
}
pmeGridIndexKernel->execute(cc.getNumAtoms());
sort->sort(pmeAtomGridIndex);
stepsToSort = 3;
}
else
stepsToSort--;
setPeriodicBoxArgs(cc, pmeSpreadChargeKernel, 2);
if (cc.getUseDoublePrecision()) {
pmeSpreadChargeKernel->setArg(7, recipBoxVectors[0]);
pmeSpreadChargeKernel->setArg(8, recipBoxVectors[1]);
pmeSpreadChargeKernel->setArg(9, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel->setArg(7, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel->setArg(8, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel->setArg(9, recipBoxVectorsFloat[2]);
}
pmeSpreadChargeKernel->execute(cc.getNumAtoms());
if (useFixedPointChargeSpreading)
pmeFinishSpreadChargeKernel->execute(gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(pmeGrid1, pmeGrid2, true);
if (cc.getUseDoublePrecision()) {
pmeConvolutionKernel->setArg(4, recipBoxVectors[0]);
pmeConvolutionKernel->setArg(5, recipBoxVectors[1]);
pmeConvolutionKernel->setArg(6, recipBoxVectors[2]);
pmeEvalEnergyKernel->setArg(5, recipBoxVectors[0]);
pmeEvalEnergyKernel->setArg(6, recipBoxVectors[1]);
pmeEvalEnergyKernel->setArg(7, recipBoxVectors[2]);
}
else {
pmeConvolutionKernel->setArg(4, recipBoxVectorsFloat[0]);
pmeConvolutionKernel->setArg(5, recipBoxVectorsFloat[1]);
pmeConvolutionKernel->setArg(6, recipBoxVectorsFloat[2]);
pmeEvalEnergyKernel->setArg(5, recipBoxVectorsFloat[0]);
pmeEvalEnergyKernel->setArg(6, recipBoxVectorsFloat[1]);
pmeEvalEnergyKernel->setArg(7, recipBoxVectorsFloat[2]);
}
if (includeEnergy)
pmeEvalEnergyKernel->execute(gridSizeX*gridSizeY*gridSizeZ);
pmeConvolutionKernel->execute(gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(pmeGrid2, pmeGrid1, false);
setPeriodicBoxArgs(cc, pmeInterpolateForceKernel, 3);
if (cc.getUseDoublePrecision()) {
pmeInterpolateForceKernel->setArg(8, recipBoxVectors[0]);
pmeInterpolateForceKernel->setArg(9, recipBoxVectors[1]);
pmeInterpolateForceKernel->setArg(10, recipBoxVectors[2]);
}
else {
pmeInterpolateForceKernel->setArg(8, recipBoxVectorsFloat[0]);
pmeInterpolateForceKernel->setArg(9, recipBoxVectorsFloat[1]);
pmeInterpolateForceKernel->setArg(10, recipBoxVectorsFloat[2]);
}
if (deviceIsCpu)
pmeInterpolateForceKernel->execute(cc.getNumThreadBlocks(), 1);
else
pmeInterpolateForceKernel->execute(cc.getNumAtoms());
}
if (doLJPME && hasLJ) {
if (dispersionStepsToSort <= 0) {
setPeriodicBoxArgs(cc, pmeDispersionGridIndexKernel, 2);
if (cc.getUseDoublePrecision()) {
pmeDispersionGridIndexKernel->setArg(7, recipBoxVectors[0]);
pmeDispersionGridIndexKernel->setArg(8, recipBoxVectors[1]);
pmeDispersionGridIndexKernel->setArg(9, recipBoxVectors[2]);
}
else {
pmeDispersionGridIndexKernel->setArg(7, recipBoxVectorsFloat[0]);
pmeDispersionGridIndexKernel->setArg(8, recipBoxVectorsFloat[1]);
pmeDispersionGridIndexKernel->setArg(9, recipBoxVectorsFloat[2]);
}
pmeDispersionGridIndexKernel->execute(cc.getNumAtoms());
sort->sort(pmeDispersionAtomGridIndex);
dispersionStepsToSort = 3;
}
else
dispersionStepsToSort--;
if (useFixedPointChargeSpreading)
cc.clearBuffer(pmeGrid2);
else
cc.clearBuffer(pmeGrid1);
setPeriodicBoxArgs(cc, pmeDispersionSpreadChargeKernel, 2);
if (cc.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel->setArg(7, recipBoxVectors[0]);
pmeDispersionSpreadChargeKernel->setArg(8, recipBoxVectors[1]);
pmeDispersionSpreadChargeKernel->setArg(9, recipBoxVectors[2]);
}
else {
pmeDispersionSpreadChargeKernel->setArg(7, recipBoxVectorsFloat[0]);
pmeDispersionSpreadChargeKernel->setArg(8, recipBoxVectorsFloat[1]);
pmeDispersionSpreadChargeKernel->setArg(9, recipBoxVectorsFloat[2]);
}
if (usePmeDispersionWave64LdsSpread) {
const int workSize = ((cc.getNumAtoms()+pmeDispersionAtomsPerBlock-1)/pmeDispersionAtomsPerBlock)*pmeDispersionSpreadBlockSize;
pmeDispersionSpreadChargeKernel->execute(workSize, pmeDispersionSpreadBlockSize);
}
else
pmeDispersionSpreadChargeKernel->execute(cc.getNumAtoms());
if (useFixedPointChargeSpreading)
pmeDispersionFinishSpreadChargeKernel->execute(dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
dispersionFft->execFFT(pmeGrid1, pmeGrid2, true);
if (cc.getUseDoublePrecision()) {
pmeDispersionConvolutionKernel->setArg(4, recipBoxVectors[0]);
pmeDispersionConvolutionKernel->setArg(5, recipBoxVectors[1]);
pmeDispersionConvolutionKernel->setArg(6, recipBoxVectors[2]);
pmeDispersionEvalEnergyKernel->setArg(5, recipBoxVectors[0]);
pmeDispersionEvalEnergyKernel->setArg(6, recipBoxVectors[1]);
pmeDispersionEvalEnergyKernel->setArg(7, recipBoxVectors[2]);
}
else {
pmeDispersionConvolutionKernel->setArg(4, recipBoxVectorsFloat[0]);
pmeDispersionConvolutionKernel->setArg(5, recipBoxVectorsFloat[1]);
pmeDispersionConvolutionKernel->setArg(6, recipBoxVectorsFloat[2]);
pmeDispersionEvalEnergyKernel->setArg(5, recipBoxVectorsFloat[0]);
pmeDispersionEvalEnergyKernel->setArg(6, recipBoxVectorsFloat[1]);
pmeDispersionEvalEnergyKernel->setArg(7, recipBoxVectorsFloat[2]);
}
if (!hasCoulomb)
cc.clearBuffer(pmeEnergyBuffer);
if (includeEnergy)
pmeDispersionEvalEnergyKernel->execute(dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
pmeDispersionConvolutionKernel->execute(dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
dispersionFft->execFFT(pmeGrid2, pmeGrid1, false);
setPeriodicBoxArgs(cc, pmeDispersionInterpolateForceKernel, 3);
if (cc.getUseDoublePrecision()) {
pmeDispersionInterpolateForceKernel->setArg(8, recipBoxVectors[0]);
pmeDispersionInterpolateForceKernel->setArg(9, recipBoxVectors[1]);
pmeDispersionInterpolateForceKernel->setArg(10, recipBoxVectors[2]);
}
else {
pmeDispersionInterpolateForceKernel->setArg(8, recipBoxVectorsFloat[0]);
pmeDispersionInterpolateForceKernel->setArg(9, recipBoxVectorsFloat[1]);
pmeDispersionInterpolateForceKernel->setArg(10, recipBoxVectorsFloat[2]);
}
if (deviceIsCpu)
pmeDispersionInterpolateForceKernel->execute(cc.getNumThreadBlocks(), 1);
else
pmeDispersionInterpolateForceKernel->execute(cc.getNumAtoms());
}
if (usePmeQueue) {
pmeSyncEvent->enqueue();
cc.restoreDefaultQueue();
}
}
if (dispersionCoefficient != 0.0 && includeDirect) {
Vec3 a, b, c;
cc.getPeriodicBoxVectors(a, b, c);
energy += dispersionCoefficient/(a[0]*b[1]*c[2]);
}
return energy;
}
void CommonCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force, int firstParticle, int lastParticle, int firstException, int lastException) {
// Make sure the new parameters are acceptable.
ContextSelector selector(cc);
if (force.getNumParticles() != cc.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) {
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
if (!hasCoulomb && charge != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Coulomb interactions, because all charges were originally 0");
if (!hasLJ && epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
}
}
set exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
if (exceptionIndex.find(i) == exceptionIndex.end()) {
if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end())
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
else
exceptions.push_back(i);
}
int numContexts = cc.getNumContexts();
int startIndex = cc.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cc.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions != exceptionAtoms.size())
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
// Record the per-particle parameters.
if (firstParticle <= lastParticle) {
vector baseParticleParamVec(cc.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
baseParticleParamVec[i] = mm_float4(charge, sigma, epsilon, 0);
}
baseParticleParams.uploadSubArray(&baseParticleParamVec[firstParticle], firstParticle, lastParticle-firstParticle+1);
// Compute the self energy.
ewaldSelfEnergy = 0.0;
totalCharge = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
if (cc.getContextIndex() == 0) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
totalCharge += baseParticleParamVec[i].x;
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
}
}
// Record the exceptions.
if (firstException <= lastException && numExceptions > 0) {
vector baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
if (make_pair(particle1, particle2) != exceptionAtoms[i])
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
baseExceptionParamsVec[i] = mm_float4(chargeProd, sigma, epsilon, 0);
}
baseExceptionParams.upload(baseExceptionParamsVec);
}
// Record parameter offsets.
vector > particleOffsetVec(force.getNumParticles());
vector > exceptionOffsetVec(numExceptions);
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
auto paramIndex = paramIndices.find(param);
if (paramIndex == paramIndices.end())
throw OpenMMException("updateParametersInContext: The parameter of a particle parameter offset has changed");
particleOffsetVec[particle].push_back(mm_float4(charge, sigma, epsilon, paramIndex->second));
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
int index = exceptionIndex[exception];
if (index < startIndex || index >= endIndex)
continue;
auto paramIndex = paramIndices.find(param);
if (paramIndex == paramIndices.end())
throw OpenMMException("updateParametersInContext: The parameter of an exception parameter offset has changed");
exceptionOffsetVec[index-startIndex].push_back(mm_float4(charge, sigma, epsilon, paramIndex->second));
}
if (max(force.getNumParticleParameterOffsets(), 1) != particleParamOffsets.getSize())
throw OpenMMException("updateParametersInContext: The number of particle parameter offsets has changed");
vector p, e;
for (int i = 0; i < particleOffsetVec.size(); i++)
for (int j = 0; j < particleOffsetVec[i].size(); j++)
p.push_back(particleOffsetVec[i][j]);
for (int i = 0; i < exceptionOffsetVec.size(); i++)
for (int j = 0; j < exceptionOffsetVec[i].size(); j++)
e.push_back(exceptionOffsetVec[i][j]);
if (force.getNumParticleParameterOffsets() > 0)
particleParamOffsets.upload(p);
if (max((int) e.size(), 1) != exceptionParamOffsets.getSize())
throw OpenMMException("updateParametersInContext: The number of exception parameter offsets has changed");
if (e.size() > 0)
exceptionParamOffsets.upload(e);
// Compute other values.
if (force.getUseDispersionCorrection() && cc.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cc.invalidateMolecules(info, firstParticle <= lastParticle || force.getNumParticleParameterOffsets() > 0,
firstException <= lastException || force.getNumExceptionParameterOffsets() > 0);
recomputeParams = true;
}
void CommonCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useCpuPme)
cpuPme.getAs().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = this->alpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
}
}
void CommonCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useCpuPme)
//cpuPme.getAs().getLJPMEParameters(alpha, nx, ny, nz);
throw OpenMMException("getPMEParametersInContext: CPUPME has not been implemented for LJPME yet.");
else {
alpha = this->dispersionAlpha;
nx = dispersionGridSizeX;
ny = dispersionGridSizeY;
nz = dispersionGridSizeZ;
}
}