/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2024 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "HipKernels.h"
#include "HipForceInfo.h"
#include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/common/ContextSelector.h"
#include "CommonKernelSources.h"
#include "HipBondedUtilities.h"
#include "HipExpressionUtilities.h"
#include "HipFFT3D.h"
#include "HipIntegrationUtilities.h"
#include "HipNonbondedUtilities.h"
#include "HipKernelSources.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include
#include
#include
#include
#include
using namespace OpenMM;
using namespace std;
#define CHECK_RESULT(result, prefix) \
if (result != hipSuccess) { \
std::stringstream m; \
m<computeForceAndEnergy(includeForces, includeEnergy, groups);
HipNonbondedUtilities& nb = cu.getNonbondedUtilities();
cu.setComputeForceCount(cu.getComputeForceCount()+1);
nb.prepareInteractions(groups);
map& derivs = cu.getEnergyParamDerivWorkspace();
for (auto& param : context.getParameters())
derivs[param.first] = 0;
}
double HipCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
ContextSelector selector(cu);
cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
for (auto computation : cu.getPostComputations())
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cu.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy)
sum += cu.reduceEnergy();
if (!cu.getForcesValid())
valid = false;
return sum;
}
class HipCalcNonbondedForceKernel::ForceInfo : public HipForceInfo {
public:
ForceInfo(const NonbondedForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
int getNumParticleGroups() {
return force.getNumExceptions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
private:
const NonbondedForce& force;
};
class HipCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public:
PmeIO(HipContext& cu, hipFunction_t addForcesKernel) : cu(cu), addForcesKernel(addForcesKernel) {
forceTemp.initialize(cu, cu.getNumAtoms(), "PmeForce");
}
float* getPosq() {
ContextSelector selector(cu);
cu.getPosq().download(posq);
return (float*) &posq[0];
}
void setForce(float* force) {
forceTemp.upload(force);
void* args[] = {&forceTemp.getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(addForcesKernel, args, cu.getNumAtoms());
}
private:
HipContext& cu;
vector posq;
HipArray forceTemp;
hipFunction_t addForcesKernel;
};
class HipCalcNonbondedForceKernel::PmePreComputation : public HipContext::ForcePreComputation {
public:
PmePreComputation(HipContext& cu, Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : cu(cu), pme(pme), io(io) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
Vec3 boxVectors[3] = {Vec3(cu.getPeriodicBoxSize().x, 0, 0), Vec3(0, cu.getPeriodicBoxSize().y, 0), Vec3(0, 0, cu.getPeriodicBoxSize().z)};
pme.getAs().beginComputation(io, boxVectors, includeEnergy);
}
private:
HipContext& cu;
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class HipCalcNonbondedForceKernel::PmePostComputation : public HipContext::ForcePostComputation {
public:
PmePostComputation(Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : pme(pme), io(io) {
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
return pme.getAs().finishComputation(io);
}
private:
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class HipCalcNonbondedForceKernel::SyncStreamPreComputation : public HipContext::ForcePreComputation {
public:
SyncStreamPreComputation(HipContext& cu, hipStream_t stream, hipEvent_t event, int forceGroup) : cu(cu), stream(stream), event(event), forceGroup(forceGroup) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1< exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector > exclusions;
vector exceptions;
map exceptionIndex;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end()) {
exceptionIndex[i] = exceptions.size();
exceptions.push_back(i);
}
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
vector baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
vector > exclusionList(numParticles);
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
exclusionList[i].push_back(i);
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (auto exclusion : exclusions) {
exclusionList[exclusion.first].push_back(exclusion.second);
exclusionList[exclusion.second].push_back(exclusion.first);
}
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME && hasLJ);
usePosqCharges = hasCoulomb ? cu.requestPosqCharges() : false;
map defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
defines["USE_LJ_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
if (useCutoff) {
// Compute the reaction field constants.
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = cu.doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = cu.doubleToString(reactionFieldC);
// Compute the switching coefficients.
if (force.getUseSwitchingFunction()) {
defines["LJ_SWITCH_CUTOFF"] = cu.doubleToString(force.getSwitchingDistance());
defines["LJ_SWITCH_C3"] = cu.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0));
defines["LJ_SWITCH_C4"] = cu.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0));
defines["LJ_SWITCH_C5"] = cu.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && !doLJPME)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
map paramsDefines;
paramsDefines["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (force.getNumParticleParameterOffsets() > 0)
paramsDefines["HAS_PARTICLE_OFFSETS"] = "1";
if (force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_EXCEPTION_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (doLJPME)
paramsDefines["INCLUDE_LJPME_EXCEPTIONS"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
if (cu.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
// Create the reciprocal space kernels.
map replacements;
replacements["NUM_ATOMS"] = cu.intToString(numParticles);
replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
replacements["KMAX_X"] = cu.intToString(kmaxx);
replacements["KMAX_Y"] = cu.intToString(kmaxy);
replacements["KMAX_Z"] = cu.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
replacements["M_PI"] = cu.doubleToString(M_PI);
hipModule_t module = cu.createModule(HipKernelSources::vectorOps+CommonKernelSources::ewald, replacements);
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums.initialize(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = cu.findLegalFFTDimension(gridSizeX);
gridSizeY = cu.findLegalFFTDimension(gridSizeY);
gridSizeZ = cu.findLegalFFTDimension(gridSizeZ);
if (doLJPME) {
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = cu.findLegalFFTDimension(dispersionGridSizeX);
dispersionGridSizeY = cu.findLegalFFTDimension(dispersionGridSizeY);
dispersionGridSizeZ = cu.findLegalFFTDimension(dispersionGridSizeZ);
}
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME) {
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
}
if (cu.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cu.doubleToString(pow(dispersionAlpha, 6)/3.0);
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
usePmeStream = (!cu.getPlatformData().disablePmeStream && !cu.getPlatformData().useCpuPme);
map pmeDefines;
pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
useFixedPointChargeSpreading = cu.getUseDoublePrecision() || !cu.getSupportsHardwareFloatGlobalAtomicAdd() || cu.getPlatformData().deterministicForces;
if (useFixedPointChargeSpreading)
pmeDefines["USE_FIXED_POINT_CHARGE_SPREADING"] = "1";
if (usePmeStream)
pmeDefines["USE_PME_STREAM"] = "1";
map replacements;
replacements["CHARGE"] = (usePosqCharges ? "pos.w" : "charges[atom]");
hipModule_t module = cu.createModule(HipKernelSources::vectorOps+cu.replaceStrings(CommonKernelSources::pme, replacements), pmeDefines);
if (cu.getPlatformData().useCpuPme && !doLJPME && usePosqCharges) {
// Create the CPU PME kernel.
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
cpuPme.getAs().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, cu.getPlatformData().deterministicForces);
hipFunction_t addForcesKernel = cu.getKernel(module, "addForces");
pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
}
}
if (pmeio == NULL) {
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
pmeEvalEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
if (useFixedPointChargeSpreading)
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
if (doLJPME) {
pmeDefines["EWALD_ALPHA"] = cu.doubleToString(dispersionAlpha);
pmeDefines["GRID_SIZE_X"] = cu.intToString(dispersionGridSizeX);
pmeDefines["GRID_SIZE_Y"] = cu.intToString(dispersionGridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(dispersionGridSizeZ);
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1";
pmeDefines["CHARGE_FROM_SIGEPS"] = "1";
module = cu.createModule(HipKernelSources::vectorOps+CommonKernelSources::pme, pmeDefines);
if (useFixedPointChargeSpreading)
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeDispersionSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeDispersionConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeEvalDispersionEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
pmeInterpolateDispersionForceKernel = cu.getKernel(module, "gridInterpolateForce");
}
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME) {
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
pmeGrid1.initialize(cu, gridElements, 2*elementSize, "pmeGrid1");
pmeGrid2.initialize(cu, gridElements, 2*elementSize, "pmeGrid2");
if (useFixedPointChargeSpreading)
cu.addAutoclearBuffer(pmeGrid2);
else
cu.addAutoclearBuffer(pmeGrid1);
pmeBsplineModuliX.initialize(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY.initialize(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ.initialize(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX.initialize(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY.initialize(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ.initialize(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeAtomGridIndex.initialize(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer.initialize(cu, cu.getNumThreadBlocks()*HipContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(pmeEnergyBuffer);
sort = new HipSort(cu, new SortTrait(), cu.getNumAtoms());
// Prepare for doing PME on its own stream.
if (usePmeStream) {
CHECK_RESULT(hipStreamCreateWithFlags(&pmeStream, hipStreamNonBlocking), "Error creating stream for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&pmeSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(¶msSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), pmeEnergyBuffer, recipForceGroup));
}
hipStream_t fftStream = usePmeStream ? pmeStream : cu.getCurrentStream();
fft = new HipFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ, true, fftStream, pmeGrid1, pmeGrid2);
if (doLJPME)
dispersionFft = new HipFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true, fftStream, pmeGrid1, pmeGrid2);
hasInitializedFFT = true;
// Initialize the b-spline moduli.
for (int grid = 0; grid < 2; grid++) {
int xsize, ysize, zsize;
HipArray *xmoduli, *ymoduli, *zmoduli;
if (grid == 0) {
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = &pmeBsplineModuliX;
ymoduli = &pmeBsplineModuliY;
zmoduli = &pmeBsplineModuliZ;
}
else {
if (!doLJPME)
continue;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = &pmeDispersionBsplineModuliX;
ymoduli = &pmeDispersionBsplineModuliY;
zmoduli = &pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector data(PmeOrder);
vector ddata(PmeOrder);
vector bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = sc*sc+ss*ss;
}
for (int i = 0; i < ndata; i++)
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[(i-1+ndata)%ndata]+moduli[(i+1)%ndata])*0.5;
if (dim == 0)
xmoduli->upload(moduli, true);
else if (dim == 1)
ymoduli->upload(moduli, true);
else
zmoduli->upload(moduli, true);
}
}
}
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector > atoms(numExclusions, vector(2));
exclusionAtoms.initialize(cu, numExclusions, "exclusionAtoms");
exclusionParams.initialize(cu, numExclusions, "exclusionParams");
vector exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = make_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exclusionParams, "float4");
replacements["EWALD_ALPHA"] = cu.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CommonKernelSources::coulombLennardJones, defines);
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize(cu, cu.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec);
map replacements;
replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb && !usePosqCharges)
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) {
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
}
source = cu.replaceStrings(source, replacements);
if (force.getIncludeDirectSpace())
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), numParticles > 3000, true);
// Initialize the exceptions.
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
paramsDefines["HAS_EXCEPTIONS"] = "1";
exceptionAtoms.resize(numExceptions);
vector > atoms(numExceptions, vector(2));
exceptionParams.initialize(cu, numExceptions, "exceptionParams");
baseExceptionParams.initialize(cu, numExceptions, "baseExceptionParams");
vector baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
baseExceptionParams.upload(baseExceptionParamsVec);
map replacements;
replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams, "float4");
if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
// Initialize parameter offsets.
vector > particleOffsetVec(force.getNumParticles());
vector > exceptionOffsetVec(numExceptions);
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
particleOffsetVec[particle].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
int index = exceptionIndex[exception];
if (index < startIndex || index >= endIndex)
continue;
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[index-startIndex].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
particleOffsetIndices.initialize(cu, cu.getPaddedNumAtoms()+1, "particleOffsetIndices");
vector particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) {
particleOffsetIndicesVec.push_back(p.size());
for (int j = 0; j < particleOffsetVec[i].size(); j++)
p.push_back(particleOffsetVec[i][j]);
}
while (particleOffsetIndicesVec.size() < particleOffsetIndices.getSize())
particleOffsetIndicesVec.push_back(p.size());
for (int i = 0; i < exceptionOffsetVec.size(); i++) {
exceptionOffsetIndicesVec.push_back(e.size());
for (int j = 0; j < exceptionOffsetVec[i].size(); j++)
e.push_back(exceptionOffsetVec[i][j]);
}
exceptionOffsetIndicesVec.push_back(e.size());
if (force.getNumParticleParameterOffsets() > 0) {
particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec);
}
exceptionParamOffsets.initialize(cu, max((int) e.size(), 1), "exceptionParamOffsets");
exceptionOffsetIndices.initialize(cu, exceptionOffsetIndicesVec.size(), "exceptionOffsetIndices");
if (e.size() > 0) {
exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
}
globalParams.initialize(cu, max((int) paramValues.size(), 1), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "globalParams");
if (paramValues.size() > 0)
globalParams.upload(paramValues, true);
recomputeParams = true;
// Initialize the kernel for updating parameters.
hipModule_t module = cu.createModule(CommonKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters");
computeExclusionParamsKernel = cu.getKernel(module, "computeExclusionParameters");
info = new ForceInfo(force);
cu.addForce(info);
}
double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters.
ContextSelector selector(cu);
bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]);
if (value != paramValues[i]) {
paramValues[i] = value;;
paramChanged = true;
}
}
if (paramChanged) {
recomputeParams = true;
globalParams.upload(paramValues, true);
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) {
int computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms();
vector paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
int numExceptions;
if (exceptionParams.isInitialized()) {
numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, ¶msArgs[0], cu.getPaddedNumAtoms());
if (exclusionParams.isInitialized()) {
int numExclusions = exclusionParams.getSize();
vector exclusionParamsArgs = {&cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&numExclusions, &exclusionAtoms.getDevicePointer(), &exclusionParams.getDevicePointer()};
cu.executeKernel(computeExclusionParamsKernel, &exclusionParamsArgs[0], numExclusions);
}
if (usePmeStream) {
hipEventRecord(paramsSyncEvent, cu.getCurrentStream());
hipStreamWaitEvent(pmeStream, paramsSyncEvent, 0);
}
if (hasOffsets)
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
// Do reciprocal space calculations.
if (cosSinSums.isInitialized() && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
if (pmeGrid1.isInitialized() && includeReciprocal) {
if (usePmeStream)
cu.setCurrentStream(pmeStream);
// Invert the periodic box vectors.
Vec3 boxVectors[3];
cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
double scale = 1.0/determinant;
double4 recipBoxVectors[3];
recipBoxVectors[0] = make_double4(boxVectors[1][1]*boxVectors[2][2]*scale, 0, 0, 0);
recipBoxVectors[1] = make_double4(-boxVectors[1][0]*boxVectors[2][2]*scale, boxVectors[0][0]*boxVectors[2][2]*scale, 0, 0);
recipBoxVectors[2] = make_double4((boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0])*scale, -boxVectors[0][0]*boxVectors[2][1]*scale, boxVectors[0][0]*boxVectors[1][1]*scale, 0);
float4 recipBoxVectorsFloat[3];
void* recipBoxVectorPointer[3];
if (cu.getUseDoublePrecision()) {
recipBoxVectorPointer[0] = &recipBoxVectors[0];
recipBoxVectorPointer[1] = &recipBoxVectors[1];
recipBoxVectorPointer[2] = &recipBoxVectors[2];
}
else {
recipBoxVectorsFloat[0] = make_float4((float) recipBoxVectors[0].x, 0, 0, 0);
recipBoxVectorsFloat[1] = make_float4((float) recipBoxVectors[1].x, (float) recipBoxVectors[1].y, 0, 0);
recipBoxVectorsFloat[2] = make_float4((float) recipBoxVectors[2].x, (float) recipBoxVectors[2].y, (float) recipBoxVectors[2].z, 0);
recipBoxVectorPointer[0] = &recipBoxVectorsFloat[0];
recipBoxVectorPointer[1] = &recipBoxVectorsFloat[1];
recipBoxVectorPointer[2] = &recipBoxVectorsFloat[2];
}
// Execute the reciprocal space kernels.
// With fixed point charge spreading, finishSpreadCharge kernel is not needed as
// gridSpreadCharge can write directly to pmeGrid1.
HipArray& pmeSpreadDstGrid = useFixedPointChargeSpreading ? pmeGrid2 : pmeGrid1;
if (hasCoulomb) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernelFlat(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &pmeSpreadDstGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernelFlat(pmeSpreadChargeKernel, spreadArgs, PmeOrder*cu.getNumAtoms(), 128);
if (useFixedPointChargeSpreading) {
void* finishSpreadArgs[] = {&pmeGrid2.getDevicePointer(), &pmeGrid1.getDevicePointer()};
cu.executeKernelFlat(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
}
fft->execFFT(true);
if (includeEnergy) {
void* computeEnergyArgs[] = {&pmeGrid2.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
}
void* convolutionArgs[] = {&pmeGrid2.getDevicePointer(), &pmeBsplineModuliX.getDevicePointer(),
&pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernelFlat(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
fft->execFFT(false);
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &pmeGrid1.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernelFlat(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (doLJPME && hasLJ) {
if (!hasCoulomb) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernelFlat(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(pmeAtomGridIndex);
cu.clearBuffer(pmeEnergyBuffer);
}
cu.clearBuffer(pmeSpreadDstGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &pmeSpreadDstGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon.getDevicePointer()};
cu.executeKernelFlat(pmeDispersionSpreadChargeKernel, spreadArgs, PmeOrder*cu.getNumAtoms(), 128);
if (useFixedPointChargeSpreading) {
void* finishSpreadArgs[] = {&pmeGrid2.getDevicePointer(), &pmeGrid1.getDevicePointer()};
cu.executeKernelFlat(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
}
dispersionFft->execFFT(true);
if (includeEnergy) {
void* computeEnergyArgs[] = {&pmeGrid2.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
void* convolutionArgs[] = {&pmeGrid2.getDevicePointer(), &pmeDispersionBsplineModuliX.getDevicePointer(),
&pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernelFlat(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
dispersionFft->execFFT(false);
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &pmeGrid1.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon.getDevicePointer()};
cu.executeKernelFlat(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (usePmeStream) {
hipEventRecord(pmeSyncEvent, pmeStream);
cu.restoreDefaultStream();
}
}
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
}
return energy;
}
void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force, int firstParticle, int lastParticle, int firstException, int lastException) {
// Make sure the new parameters are acceptable.
ContextSelector selector(cu);
if (force.getNumParticles() != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) {
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
if (!hasCoulomb && charge != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Coulomb interactions, because all charges were originally 0");
if (!hasLJ && epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
}
}
set exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end())
exceptions.push_back(i);
}
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions != exceptionAtoms.size())
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
// Record the per-particle parameters.
if (firstParticle <= lastParticle) {
vector baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
}
baseParticleParams.uploadSubArray(&baseParticleParamVec[firstParticle], firstParticle, lastParticle-firstParticle+1);
// Compute the self energy.
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
if (cu.getContextIndex() == 0) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
}
}
// Record the exceptions.
if (firstException <= lastException) {
vector baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
if (make_pair(particle1, particle2) != exceptionAtoms[i])
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0);
}
baseExceptionParams.upload(baseExceptionParamsVec);
}
// Compute other values.
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules(info, firstParticle <= lastParticle, firstException <= lastException);
recomputeParams = true;
}
void HipCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cu.getPlatformData().useCpuPme)
cpuPme.getAs().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = this->alpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
}
}
void HipCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (!doLJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cu.getPlatformData().useCpuPme)
//cpuPme.getAs().getLJPMEParameters(alpha, nx, ny, nz);
throw OpenMMException("getPMEParametersInContext: CPUPME has not been implemented for LJPME yet.");
else {
alpha = this->dispersionAlpha;
nx = dispersionGridSizeX;
ny = dispersionGridSizeY;
nz = dispersionGridSizeZ;
}
}