Commit c9bf6eee authored by Peter Eastman's avatar Peter Eastman
Browse files

Began CUDA implementation of CustomNonbondedForce

parent 364130a5
......@@ -81,8 +81,8 @@ public:
_gpuContext* gpu;
KernelImpl* primaryKernel;
bool removeCM;
bool hasBonds, hasAngles, hasPeriodicTorsions, hasRB, hasNonbonded;
int nonbondedMethod;
bool hasBonds, hasAngles, hasPeriodicTorsions, hasRB, hasNonbonded, hasCustomNonbonded;
int nonbondedMethod, customNonbondedMethod;
int cmMotionFrequency;
int stepCount, computeForceCount;
double time;
......
......@@ -47,6 +47,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcNonbondedForceKernel::Name())
return new CudaCalcNonbondedForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomNonbondedForceKernel::Name())
return new CudaCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
if (name == CalcGBSAOBCForceKernel::Name())
return new CudaCalcGBSAOBCForceKernel(name, platform, data);
if (name == IntegrateVerletStepKernel::Name())
......
......@@ -51,15 +51,11 @@ static void calcForces(ContextImpl& context, CudaPlatform::PlatformData& data) {
kReduceObcGbsaBornForces(gpu);
kCalculateObcGbsaForces2(gpu);
}
else if (data.hasNonbonded) {
else if (data.hasNonbonded)
kCalculateCDLJForces(gpu);
}
if (data.hasCustomNonbonded)
kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
kCalculateLocalForces(gpu);
/*
if (gpu->bIncludeGBSA)
kReduceBornSumAndForces(gpu);
else
*/
kReduceForces(gpu);
}
......@@ -255,16 +251,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Identify which exceptions are 1-4 interactions.
vector<pair<int, int> > exclusions;
vector<int> nb14s;
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair<int, int>(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0)
nb14s.push_back(i);
exceptions.push_back(i);
}
num14 = nb14s.size();
// Initialize nonbonded interactions.
......@@ -329,15 +324,16 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize 1-4 nonbonded interactions.
{
vector<int> particle1(num14);
vector<int> particle2(num14);
vector<float> c6(num14);
vector<float> c12(num14);
vector<float> q1(num14);
vector<float> q2(num14);
for (int i = 0; i < num14; i++) {
int numExceptions = exceptions.size();
vector<int> particle1(numExceptions);
vector<int> particle2(numExceptions);
vector<float> c6(numExceptions);
vector<float> c12(numExceptions);
vector<float> q1(numExceptions);
vector<float> q2(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double charge, sig, eps;
force.getExceptionParameters(nb14s[i], particle1[i], particle2[i], charge, sig, eps);
force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
c6[i] = (float) (4*eps*pow(sig, 6.0));
c12[i] = (float) (4*eps*pow(sig, 12.0));
q1[i] = (float) charge;
......@@ -358,6 +354,88 @@ double CudaCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
return 0.0;
}
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
}
void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
if (data.primaryKernel == NULL)
data.primaryKernel = this;
data.hasCustomNonbonded = true;
numParticles = force.getNumParticles();
_gpuContext* gpu = data.gpu;
// Identify which exceptions are actual interactions.
vector<pair<int, int> > exclusions;
vector<int> exceptions;
{
vector<double> parameters;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
force.getExceptionParameters(i, particle1, particle2, parameters);
exclusions.push_back(pair<int, int>(particle1, particle2));
if (parameters.size() > 0)
exceptions.push_back(i);
}
}
// Initialize nonbonded interactions.
vector<int> particle(numParticles);
vector<vector<double> > parameters(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
force.getParticleParameters(i, parameters[i]);
particle[i] = i;
exclusionList[i].push_back(i);
}
for (int i = 0; i < (int)exclusions.size(); i++) {
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
CudaNonbondedMethod method = NO_CUTOFF;
if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
method = CUTOFF;
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) {
Vec3 boxVectors[3];
force.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
gpuSetPeriodicBoxSize(gpu, (float)boxVectors[0][0], (float)boxVectors[1][1], (float)boxVectors[2][2]);
method = PERIODIC;
}
data.customNonbondedMethod = method;
// Initialize exceptions.
int numExceptions = exceptions.size();
vector<int> exceptionParticle1(numExceptions);
vector<int> exceptionParticle2(numExceptions);
vector<vector<double> > exceptionParams(numExceptions);
for (int i = 0; i < numExceptions; i++)
force.getExceptionParameters(exceptions[i], exceptionParticle1[i], exceptionParticle2[i], exceptionParams[i]);
// Record information for the expressions.
vector<string> paramNames;
vector<string> combiningRules;
for (int i = 0; i < force.getNumParameters(); i++) {
paramNames.push_back(force.getParameterName(i));
combiningRules.push_back(force.getParameterCombiningRule(i));
}
gpuSetCustomNonbondedParameters(gpu, parameters, exclusionList, exceptionParticle1, exceptionParticle2, exceptionParams, method,
(float)force.getCutoffDistance(), force.getEnergyFunction(), combiningRules, paramNames);
}
void CudaCalcCustomNonbondedForceKernel::executeForces(ContextImpl& context) {
if (data.primaryKernel == this)
calcForces(context, data);
}
double CudaCalcCustomNonbondedForceKernel::executeEnergy(ContextImpl& context) {
if (data.primaryKernel == this)
return calcEnergy(context, system);
return 0.0;
}
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
}
......
......@@ -256,7 +256,41 @@ public:
double executeEnergy(ContextImpl& context);
private:
CudaPlatform::PlatformData& data;
int numParticles, num14;
int numParticles;
System& system;
};
/**
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
*/
class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public:
CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcCustomNonbondedForceKernel(name, platform), data(data), system(system) {
}
~CudaCalcCustomNonbondedForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomNonbondedForce this kernel will be used for
*/
void initialize(const System& system, const CustomNonbondedForce& force);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void executeForces(ContextImpl& context);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the CustomNonbondedForce
*/
double executeEnergy(ContextImpl& context);
private:
CudaPlatform::PlatformData& data;
int numParticles;
System& system;
};
......
......@@ -53,6 +53,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
......@@ -104,8 +105,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
delete data;
}
CudaPlatform::PlatformData::PlatformData(_gpuContext* gpu) : gpu(gpu), removeCM(false), nonbondedMethod(0), hasBonds(false), hasAngles(false),
hasPeriodicTorsions(false), hasRB(false), hasNonbonded(false), primaryKernel(NULL), stepCount(0), computeForceCount(0), time(0.0) {
CudaPlatform::PlatformData::PlatformData(_gpuContext* gpu) : gpu(gpu), removeCM(false), nonbondedMethod(0), customNonbondedMethod(0), hasBonds(false), hasAngles(false),
hasPeriodicTorsions(false), hasRB(false), hasNonbonded(false), hasCustomNonbonded(false), primaryKernel(NULL), stepCount(0), computeForceCount(0), time(0.0) {
stringstream device;
device << gpu->device;
propertyValues[CudaPlatform::CudaDevice()] = device.str();
......
......@@ -35,6 +35,7 @@ extern void kGenerateRandoms(gpuContext gpu);
// Main loop
extern void kCalculateCDLJObcGbsaForces1(gpuContext gpu);
extern void kCalculateCDLJForces(gpuContext gpu);
extern void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid);
extern void kReduceObcGbsaBornForces(gpuContext gpu);
extern void kCalculateObcGbsaForces2(gpuContext gpu);
extern void kCalculateLocalForces(gpuContext gpu);
......@@ -64,6 +65,8 @@ extern void SetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu);
extern void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu);
extern void SetCalculateCDLJForcesSim(gpuContext gpu);
extern void GetCalculateCDLJForcesSim(gpuContext gpu);
extern void SetCalculateCustomNonbondedForcesSim(gpuContext gpu);
extern void GetCalculateCustomNonbondedForcesSim(gpuContext gpu);
extern void SetCalculateLocalForcesSim(gpuContext gpu);
extern void GetCalculateLocalForcesSim(gpuContext gpu);
extern void SetCalculateObcGbsaBornSumSim(gpuContext gpu);
......@@ -88,3 +91,6 @@ extern void SetBrownianUpdateSim(gpuContext gpu);
extern void GetBrownianUpdateSim(gpuContext gpu);
extern void SetRandomSim(gpuContext gpu);
extern void GetRandomSim(gpuContext gpu);
extern void SetCustomNonbondedForceExpression(const Expression<128>& expression);
extern void SetCustomNonbondedEnergyExpression(const Expression<128>& expression);
extern void SetCustomNonbondedCombiningRules(const Expression<64>* expressions);
......@@ -231,7 +231,7 @@ static const int G8X_RANDOM_THREADS_PER_BLOCK = 256;
static const int GT2XX_RANDOM_THREADS_PER_BLOCK = 384;
static const int G8X_NONBOND_WORKUNITS_PER_SM = 220;
static const int GT2XX_NONBOND_WORKUNITS_PER_SM = 256;
static const unsigned int MAX_STACK_SIZE = 8;
enum CudaNonbondedMethod
{
NO_CUTOFF,
......@@ -240,6 +240,18 @@ enum CudaNonbondedMethod
EWALD
};
enum ExpressionOp {
CONSTANT = 0, VARIABLE0, VARIABLE1, VARIABLE2, VARIABLE3, VARIABLE4, VARIABLE5, VARIABLE6, VARIABLE7, VARIABLE8, CUSTOM, ADD, SUBTRACT, MULTIPLY, DIVIDE,
POWER, NEGATE, SQRT, EXP, LOG, SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN, SQUARE, CUBE, RECIPROCAL, INCREMENT, DECREMENT
};
template<int SIZE>
struct Expression {
int op[SIZE];
float arg[SIZE];
int length, stackSize;
};
struct cudaGmxSimulation {
// Constants
unsigned int atoms; // Number of atoms
......@@ -291,7 +303,9 @@ struct cudaGmxSimulation {
float bigFloat; // Floating point value used as a flag for Shaken atoms
float epsfac; // Epsilon factor for CDLJ calculations
CudaNonbondedMethod nonbondedMethod; // How to handle nonbonded interactions
float nonbondedCutoffSqr; // Cutoff distance for CDLJ calculations
CudaNonbondedMethod customNonbondedMethod; // How to handle custom nonbonded interactions
float nonbondedCutoff; // Cutoff distance for nonbonded interactions
float nonbondedCutoffSqr; // Square of the cutoff distance for nonbonded interactions
float periodicBoxSizeX; // The X dimension of the periodic box
float periodicBoxSizeY; // The Y dimension of the periodic box
float periodicBoxSizeZ; // The Z dimension of the periodic box
......@@ -324,6 +338,10 @@ struct cudaGmxSimulation {
float collisionFrequency; // Collision frequency for Andersen thermostat
float2* pObcData; // Pointer to fixed Born data
float2* pAttr; // Pointer to additional atom attributes (sig, eps)
float4* pCustomParams; // Pointer to atom parameters for custom nonbonded force
int4* pCustomExceptionID; // Atom indices for custom nonbonded exceptions
float4* pCustomExceptionParams; // Parameters for custom nonbonded exceptions
unsigned int customExceptions; // Number of custom nonbonded exceptions
float2* pEwaldCosSinSum; // Pointer to the cos/sin sums (ewald)
unsigned int bonds; // Number of bonds
int4* pBondID; // Bond atom and output buffer IDs
......
......@@ -49,8 +49,10 @@ using namespace std;
#include "hilbert.h"
#include "openmm/OpenMMException.h"
#include "quern.h"
#include "Lepton.h"
using OpenMM::OpenMMException;
using Lepton::Operation;
struct ShakeCluster {
int centralID;
......@@ -130,6 +132,116 @@ static const float BOLTZ = (RGAS / KILO); // (k
#define DUMP_PARAMETERS 0
template <int SIZE>
static Expression<SIZE> createExpression(const string& expression, const Lepton::ExpressionProgram& program, const vector<string>& variables) {
Expression<SIZE> exp;
if (program.getNumOperations() > SIZE)
throw OpenMMException("Expression contains too many operations: "+expression);
exp.length = program.getNumOperations();
exp.stackSize = program.getStackSize();
for (int i = 0; i < program.getNumOperations(); i++) {
const Operation& op = program.getOperation(i);
switch (op.getId()) {
case Operation::CONSTANT:
exp.op[i] = CONSTANT;
exp.arg[i] = op.evaluate(NULL, map<string, double>());
break;
case Operation::VARIABLE:
if (variables.size() > 0 && op.getName() == variables[0])
exp.op[i] = VARIABLE0;
else if (variables.size() > 1 && op.getName() == variables[1])
exp.op[i] = VARIABLE1;
else if (variables.size() > 2 && op.getName() == variables[2])
exp.op[i] = VARIABLE2;
else if (variables.size() > 3 && op.getName() == variables[3])
exp.op[i] = VARIABLE3;
else if (variables.size() > 4 && op.getName() == variables[4])
exp.op[i] = VARIABLE4;
else if (variables.size() > 5 && op.getName() == variables[5])
exp.op[i] = VARIABLE5;
else if (variables.size() > 6 && op.getName() == variables[6])
exp.op[i] = VARIABLE6;
else if (variables.size() > 7 && op.getName() == variables[7])
exp.op[i] = VARIABLE7;
else if (variables.size() > 8 && op.getName() == variables[8])
exp.op[i] = VARIABLE8;
else
throw OpenMMException("Unknown variable '"+op.getName()+"' in expression: "+expression);
break;
case Operation::ADD:
exp.op[i] = ADD;
break;
case Operation::SUBTRACT:
exp.op[i] = SUBTRACT;
break;
case Operation::MULTIPLY:
exp.op[i] = MULTIPLY;
break;
case Operation::DIVIDE:
exp.op[i] = DIVIDE;
break;
case Operation::POWER:
exp.op[i] = POWER;
break;
case Operation::NEGATE:
exp.op[i] = NEGATE;
break;
case Operation::SQRT:
exp.op[i] = SQRT;
break;
case Operation::EXP:
exp.op[i] = EXP;
break;
case Operation::LOG:
exp.op[i] = LOG;
break;
case Operation::SIN:
exp.op[i] = SIN;
break;
case Operation::COS:
exp.op[i] = COS;
break;
case Operation::SEC:
exp.op[i] = SEC;
break;
case Operation::CSC:
exp.op[i] = CSC;
break;
case Operation::TAN:
exp.op[i] = TAN;
break;
case Operation::COT:
exp.op[i] = COT;
break;
case Operation::ASIN:
exp.op[i] = ASIN;
break;
case Operation::ACOS:
exp.op[i] = ACOS;
break;
case Operation::ATAN:
exp.op[i] = ATAN;
break;
case Operation::SQUARE:
exp.op[i] = SQUARE;
break;
case Operation::CUBE:
exp.op[i] = CUBE;
break;
case Operation::RECIPROCAL:
exp.op[i] = RECIPROCAL;
break;
case Operation::INCREMENT:
exp.op[i] = INCREMENT;
break;
case Operation::DECREMENT:
exp.op[i] = DECREMENT;
break;
}
}
return exp;
}
extern "C"
void gpuSetBondParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<float>& length, const vector<float>& k)
{
......@@ -376,14 +488,33 @@ void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const vecto
psLJ14Parameter->Upload();
}
static void setExclusions(gpuContext gpu, const vector<vector<int> >& exclusions) {
if (gpu->exclusions.size() > 0) {
bool ok = (exclusions.size() == gpu->exclusions.size());
for (int i = 0; i < exclusions.size() && ok; i++) {
if (exclusions[i].size() != gpu->exclusions[i].size())
ok = false;
else {
for (int j = 0; j < exclusions[i].size(); j++)
if (find(gpu->exclusions[i].begin(), gpu->exclusions[i].end(), exclusions[i][j]) == gpu->exclusions[i].end())
ok = false;
}
}
if (!ok)
throw OpenMMException("All nonbonded forces must have identical sets of exceptions");
}
gpu->exclusions = exclusions;
}
extern "C"
void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const vector<int>& atom, const vector<float>& c6, const vector<float>& c12, const vector<float>& q,
const vector<char>& symbol, const vector<vector<int> >& exclusions, CudaNonbondedMethod method)
{
unsigned int coulombs = atom.size();
unsigned int coulombs = c6.size();
gpu->sim.epsfac = epsfac;
gpu->sim.nonbondedMethod = method;
gpu->exclusions = exclusions;
if (coulombs > 0)
setExclusions(gpu, exclusions);
for (unsigned int i = 0; i < coulombs; i++)
{
......@@ -419,10 +550,84 @@ void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const vector<int>& at
extern "C"
void gpuSetNonbondedCutoff(gpuContext gpu, float cutoffDistance, float solventDielectric)
{
if (gpu->sim.nonbondedCutoff != 0.0f && gpu->sim.nonbondedCutoff != cutoffDistance)
throw OpenMMException("All nonbonded forces must use the same cutoff");
gpu->sim.nonbondedCutoff = cutoffDistance;
gpu->sim.nonbondedCutoffSqr = cutoffDistance*cutoffDistance;
gpu->sim.reactionFieldK = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0f)/(2.0f*solventDielectric+1.0f);
}
extern "C"
void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double> >& parameters, const vector<vector<int> >& exclusions,
const vector<int>& exceptionAtom1, const vector<int>& exceptionAtom2, const vector<vector<double> >& exceptionParams,
CudaNonbondedMethod method, float cutoffDistance, const string& energyExp, const vector<string>& combiningRules, const vector<string>& paramNames)
{
if (gpu->sim.nonbondedCutoff != 0.0f && gpu->sim.nonbondedCutoff != cutoffDistance)
throw OpenMMException("All nonbonded forces must use the same cutoff");
if (paramNames.size() > 4)
throw OpenMMException("CudaPlatform only supports four per-atom parameters for custom nonbonded forces");
gpu->sim.nonbondedCutoff = cutoffDistance;
gpu->sim.nonbondedCutoffSqr = cutoffDistance*cutoffDistance;
gpu->sim.customNonbondedMethod = method;
gpu->sim.customExceptions = exceptionAtom1.size();
setExclusions(gpu, exclusions);
gpu->psCustomParams = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "CustomParams");
gpu->sim.pCustomParams = gpu->psCustomParams->_pDevData;
gpu->psCustomExceptionID = new CUDAStream<int4>(gpu->sim.customExceptions, 1, "CustomExceptionId");
gpu->sim.pCustomExceptionID = gpu->psCustomExceptionID->_pDevData;
gpu->psCustomExceptionParams = new CUDAStream<float4>(gpu->sim.customExceptions, 1, "CustomExceptionParams");
gpu->sim.pCustomExceptionParams = gpu->psCustomExceptionParams->_pDevData;
for (int i = 0; i < parameters.size(); i++) {
if (parameters[i].size() > 0)
(*gpu->psCustomParams)[i].x = parameters[i][0];
if (parameters[i].size() > 1)
(*gpu->psCustomParams)[i].y = parameters[i][1];
if (parameters[i].size() > 2)
(*gpu->psCustomParams)[i].z = parameters[i][2];
if (parameters[i].size() > 3)
(*gpu->psCustomParams)[i].w = parameters[i][3];
}
for (int i = 0; i < exceptionAtom1.size(); i++) {
(*gpu->psCustomExceptionID)[i].x = exceptionAtom1[i];
(*gpu->psCustomExceptionID)[i].y = exceptionAtom2[i];
(*gpu->psCustomExceptionID)[i].z = gpu->pOutputBufferCounter[exceptionAtom1[i]]++;
(*gpu->psCustomExceptionID)[i].w = gpu->pOutputBufferCounter[exceptionAtom2[i]]++;
if (exceptionParams[i].size() > 0)
(*gpu->psCustomExceptionParams)[i].x = exceptionParams[i][0];
if (exceptionParams[i].size() > 1)
(*gpu->psCustomExceptionParams)[i].y = exceptionParams[i][1];
if (exceptionParams[i].size() > 2)
(*gpu->psCustomExceptionParams)[i].z = exceptionParams[i][2];
if (exceptionParams[i].size() > 3)
(*gpu->psCustomExceptionParams)[i].w = exceptionParams[i][3];
}
gpu->psCustomParams->Upload();
gpu->psCustomExceptionID->Upload();
gpu->psCustomExceptionParams->Upload();
// Create the Expressions.
vector<string> variables;
variables.push_back("r");
for (int i = 0; i < paramNames.size(); i++)
variables.push_back(paramNames[i]);
SetCustomNonbondedEnergyExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables));
SetCustomNonbondedForceExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).differentiate("r").optimize().createProgram(), variables));
Expression<64> paramExpressions[4];
vector<string> combiningRuleParams;
combiningRuleParams.push_back("");
for (int j = 1; j < 3; j++) {
for (int i = 0; i < paramNames.size(); i++) {
stringstream name;
name << paramNames[i] << j;
combiningRuleParams.push_back(name.str());
}
}
for (int i = 0; i < paramNames.size(); i++)
paramExpressions[i] = createExpression<64>(combiningRules[i], Lepton::Parser::parse(combiningRules[i]).optimize().createProgram(), combiningRuleParams);
SetCustomNonbondedCombiningRules(paramExpressions);
}
extern "C"
void gpuSetEwaldParameters(gpuContext gpu, float alpha, int kmaxx, int kmaxy, int kmaxz)
{
......@@ -1216,6 +1421,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu->sim.surfaceAreaFactor = surfaceAreaFactor;
gpu->sim.electricConstant = electricConstant;
gpu->sim.nonbondedMethod = NO_CUTOFF;
gpu->sim.nonbondedCutoff = 0.0f;
gpu->sim.nonbondedCutoffSqr = 0.0f;
gpu->sim.bigFloat = 99999999.0f;
......@@ -1408,6 +1614,11 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psxVector4;
delete gpu->psvVector4;
delete gpu->psSigEps2;
if (gpu->psCustomParams != NULL) {
delete gpu->psCustomParams;
delete gpu->psCustomExceptionID;
delete gpu->psCustomExceptionParams;
}
if (gpu->psEwaldCosSinSum != NULL)
delete gpu->psEwaldCosSinSum;
delete gpu->psObcData;
......@@ -1761,6 +1972,7 @@ int gpuSetConstants(gpuContext gpu)
{
SetCalculateCDLJForcesSim(gpu);
SetCalculateCDLJObcGbsaForces1Sim(gpu);
SetCalculateCustomNonbondedForcesSim(gpu);
SetCalculateLocalForcesSim(gpu);
SetCalculateObcGbsaBornSumSim(gpu);
SetCalculateObcGbsaForces2Sim(gpu);
......
......@@ -88,6 +88,9 @@ struct _gpuContext {
CUDAStream<float4>* psxVector4;
CUDAStream<float4>* psvVector4;
CUDAStream<float2>* psSigEps2;
CUDAStream<float4>* psCustomParams; // Atom parameters for custom nonbonded force
CUDAStream<int4>* psCustomExceptionID; // Atom indices for custom nonbonded exceptions
CUDAStream<float4>* psCustomExceptionParams; // Parameters for custom nonbonded exceptions
CUDAStream<float2>* psEwaldCosSinSum;
CUDAStream<float2>* psObcData;
CUDAStream<float>* psObcChain;
......@@ -178,6 +181,12 @@ void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const std::vector<int
extern "C"
void gpuSetNonbondedCutoff(gpuContext gpu, float cutoffDistance, float solventDielectric);
extern "C"
void gpuSetCustomNonbondedParameters(gpuContext gpu, const std::vector<std::vector<double> >& parameters, const std::vector<std::vector<int> >& exclusions,
const std::vector<int>& exceptionAtom1, const std::vector<int>& exceptionAtom2, const std::vector<std::vector<double> >& exceptionParams,
CudaNonbondedMethod method, float cutoffDistance, const std::string& energyExp, const std::vector<std::string>& combiningRules,
const std::vector<std::string>& paramNames);
extern "C"
void gpuSetEwaldParameters(gpuContext gpu, float alpha, int kmaxx, int kmaxy, int kmaxz);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
#include "gputypes.h"
#include "cudatypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct Atom {
float x;
float y;
float z;
float4 params;
float fx;
float fy;
float fz;
};
static __constant__ cudaGmxSimulation cSim;
static __constant__ Expression<128> forceExp;
static __constant__ Expression<128> energyExp;
static __constant__ Expression<64> combiningRules[4];
void SetCalculateCustomNonbondedForcesSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetCalculateCustomNonbondedForcesSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
void SetCustomNonbondedForceExpression(const Expression<128>& expression)
{
cudaError_t status;
status = cudaMemcpyToSymbol(forceExp, &expression, sizeof(forceExp));
RTERROR(status, "SetCustomNonbondedForceExpression: cudaMemcpyToSymbol failed");
}
void SetCustomNonbondedEnergyExpression(const Expression<128>& expression)
{
cudaError_t status;
status = cudaMemcpyToSymbol(energyExp, &expression, sizeof(energyExp));
RTERROR(status, "SetCustomNonbondedEnergyExpression: cudaMemcpyToSymbol failed");
}
void SetCustomNonbondedCombiningRules(const Expression<64>* expressions)
{
cudaError_t status;
status = cudaMemcpyToSymbol(combiningRules, expressions, sizeof(combiningRules));
RTERROR(status, "SetCustomNonbondedCombiningRules: cudaMemcpyToSymbol failed");
}
template<int SIZE>
__device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float* stack, float var0, float4 vars1, float4 vars2)
{
int stackPointer = -1;
for (int i = 0; i < expression->length; i++)
{
switch (expression->op[i])
{
case CONSTANT:
stack[++stackPointer] = expression->arg[i];
break;
case VARIABLE0:
stack[++stackPointer] = var0;
break;
case VARIABLE1:
stack[++stackPointer] = vars1.x;
break;
// case VARIABLE2:
// stack[++stackPointer] = vars1.y;
// break;
// case VARIABLE3:
// stack[++stackPointer] = vars1.z;
// break;
// case VARIABLE4:
// stack[++stackPointer] = vars1.w;
// break;
case VARIABLE5:
stack[++stackPointer] = vars2.x;
break;
// case VARIABLE6:
// stack[++stackPointer] = vars2.y;
// break;
// case VARIABLE7:
// stack[++stackPointer] = vars2.z;
// break;
// case VARIABLE8:
// stack[++stackPointer] = vars2.w;
// break;
case ADD:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp+stack[--stackPointer];
break;
}
case SUBTRACT:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp-stack[--stackPointer];
break;
}
case MULTIPLY:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*stack[--stackPointer];
break;
}
case DIVIDE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp/stack[--stackPointer];
break;
}
case POWER:
{
float temp = stack[stackPointer];
stack[stackPointer] = pow(temp, stack[--stackPointer]);
break;
}
case NEGATE:
stack[stackPointer] = -stack[stackPointer];
break;
case SQRT:
stack[stackPointer] = sqrt(stack[stackPointer]);
break;
case EXP:
stack[stackPointer] = exp(stack[stackPointer]);
break;
case LOG:
stack[stackPointer] = log(stack[stackPointer]);
break;
case SIN:
stack[stackPointer] = sin(stack[stackPointer]);
break;
case COS:
stack[stackPointer] = cos(stack[stackPointer]);
break;
case SEC:
stack[stackPointer] = 1.0f/cos(stack[stackPointer]);
break;
case CSC:
stack[stackPointer] = 1.0f/sin(stack[stackPointer]);
break;
case TAN:
stack[stackPointer] = tan(stack[stackPointer]);
break;
case COT:
stack[stackPointer] = 1.0f/tan(stack[stackPointer]);
break;
case ASIN:
stack[stackPointer] = asin(stack[stackPointer]);
break;
case ACOS:
stack[stackPointer] = acos(stack[stackPointer]);
break;
case ATAN:
stack[stackPointer] = atan(stack[stackPointer]);
break;
case SQUARE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*temp;
break;
}
case CUBE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*temp*temp;
break;
}
case RECIPROCAL:
stack[stackPointer] = 1.0f/stack[stackPointer];
break;
case INCREMENT:
stack[stackPointer] = stack[stackPointer]+1.0f;
break;
case DECREMENT:
stack[stackPointer] = stack[stackPointer]-1.0f;
break;
}
}
return stack[stackPointer];
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateCustomNonbondedForces.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateCustomNonbondedForces.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCustomNonbondedForces.h"
__global__ void kFindBlockBoundsCutoff_kernel();
__global__ void kFindBlocksWithInteractionsCutoff_kernel();
__global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int* workUnit);
__global__ void kFindBlockBoundsPeriodic_kernel();
__global__ void kFindBlocksWithInteractionsPeriodic_kernel();
__global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int* workUnit);
void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
{
// printf("kCalculateCustomNonbondedCutoffForces\n");
CUDPPResult result;
switch (gpu->sim.customNonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else
kCalculateCustomNonbondedN2Forces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedN2Forces");
break;
case CUTOFF:
if (!neighborListValid)
{
kFindBlockBoundsCutoff_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsCutoff");
kFindBlocksWithInteractionsCutoff_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsCutoff");
result = cudppCompact(gpu->cudpp, gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractionCount,
gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits);
if (result != CUDPP_SUCCESS)
{
printf("Error in cudppCompact: %d\n", result);
exit(-1);
}
kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedCutoffByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCustomNonbondedCutoffForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedCutoffForces");
break;
case PERIODIC:
if (!neighborListValid)
{
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
result = cudppCompact(gpu->cudpp, gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractionCount,
gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits);
if (result != CUDPP_SUCCESS)
{
printf("Error in cudppCompact: %d\n", result);
exit(-1);
}
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedPeriodicByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCustomNonbondedPeriodicForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedPeriodicForces");
break;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for evalauating nonbonded forces. It is included
* several times in kCalculateCustomNonbondedForces.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned int* workUnit)
{
extern __shared__ float stack[];
Atom* sA = (Atom*) &stack[MAX_STACK_SIZE*blockDim.x];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF
float3* tempBuffer = (float3*) &sA[cSim.nonbond_threads_per_block];
#endif
unsigned int lasty = 0xFFFFFFFF;
while (pos < end)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = workUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
bool bExclusionFlag = (x & 0x1);
x = (x >> 17) << GRIDBITS;
float4 apos; // Local atom x, y, z, q
float3 af; // Local atom fx, fy, fz
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
Atom* psA = &sA[tbx];
unsigned int i = x + tgx;
apos = cSim.pPosq[i];
float4 params = make_float4(0,0,0,0);
af.x = 0.0f;
af.y = 0.0f;
af.z = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].params = params;
if (!bExclusionFlag)
{
for (unsigned int j = 0; j < GRID; j++)
{
float4 combinedParams;
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
dEdR = 0.0f;
}
#endif
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
}
}
else // bExclusion
{
unsigned int xi = x>>GRIDBITS;
unsigned int cell = xi+xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
unsigned int excl = cSim.pExclusion[cSim.pExclusionIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
float4 combinedParams;
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
if (!(excl & 0x1))
#endif
{
dEdR = 0.0f;
}
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
excl >>= 1;
}
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
cSim.pForce4a[offset] = of;
#else
of.x = af.x;
of.y = af.y;
of.z = af.z;
of.w = 0.0f;
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
#endif
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
unsigned int j = y + tgx;
float4 temp = cSim.pPosq[j];
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].params = make_float4(0,0,0,0);;
}
sA[threadIdx.x].fx = 0.0f;
sA[threadIdx.x].fy = 0.0f;
sA[threadIdx.x].fz = 0.0f;
if (!bExclusionFlag)
{
#ifdef USE_CUTOFF
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0)
{
// No interactions in this block.
}
else if (flags == 0xFFFFFFFF)
#endif
{
// Compute all interactions within this block.
for (unsigned int j = 0; j < GRID; j++)
{
float4 combinedParams;
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
dEdR = 0.0f;
}
#endif
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
tj = (tj + 1) & (GRID - 1);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for (unsigned int j = 0; j < GRID; j++)
{
if ((flags&(1<<j)) != 0)
{
float4 combinedParams;
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
dEdR = 0.0f;
}
#endif
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
tempBuffer[threadIdx.x].x = dx;
tempBuffer[threadIdx.x].y = dy;
tempBuffer[threadIdx.x].z = dz;
// Sum the forces on atom j.
if (tgx % 2 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+1].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+1].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+1].z;
}
if (tgx % 4 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+2].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+2].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+2].z;
}
if (tgx % 8 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+4].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+4].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+4].z;
}
if (tgx % 16 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+8].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+8].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+8].z;
}
if (tgx == 0)
{
psA[j].fx += tempBuffer[threadIdx.x].x + tempBuffer[threadIdx.x+16].x;
psA[j].fy += tempBuffer[threadIdx.x].y + tempBuffer[threadIdx.x+16].y;
psA[j].fz += tempBuffer[threadIdx.x].z + tempBuffer[threadIdx.x+16].z;
}
}
}
}
#endif
}
else // bExclusion
{
// Read fixed atom data into registers and GRF
unsigned int xi = x>>GRIDBITS;
unsigned int yi = y>>GRIDBITS;
unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
unsigned int excl = cSim.pExclusion[cSim.pExclusionIndex[cell]+tgx];
excl = (excl >> tgx) | (excl << (GRID - tgx));
for (unsigned int j = 0; j < GRID; j++)
{
float4 combinedParams;
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
if (!(excl & 0x1))
#endif
{
dEdR = 0.0f;
}
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
excl >>= 1;
tj = (tj + 1) & (GRID - 1);
}
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
cSim.pForce4a[offset] = of;
offset = y + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
cSim.pForce4a[offset] = of;
#else
of.x = af.x;
of.y = af.y;
of.z = af.z;
of.w = 0.0f;
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
of.x = sA[threadIdx.x].fx;
of.y = sA[threadIdx.x].fy;
of.z = sA[threadIdx.x].fz;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
#endif
lasty = y;
}
pos++;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests all the different force terms in the Cuda implementation of CustomNonbondedForce.
*/
#include "../../../tests/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomNonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testSimpleExpression() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("-0.1*r^3");
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(2, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
double force = 0.1*3*(2*2);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(-0.1*(2*2*2), state.getPotentialEnergy(), TOL);
}
void testParameters() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("scale*a*(r*b)^3");
forceField->addParameter("a", "a1*a2");
forceField->addParameter("b", "c+b1+b2");
forceField->addGlobalParameter("scale", 3.0);
forceField->addGlobalParameter("c", -1.0);
vector<double> params(2);
params[0] = 1.5;
params[1] = 2.0;
forceField->addParticle(params);
params[0] = 2.0;
params[1] = 3.0;
forceField->addParticle(params);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(2, 0, 0);
context.setPositions(positions);
context.setParameter("scale", 1.0);
context.setParameter("c", 0.0);
State state = context.getState(State::Forces | State::Energy);
vector<Vec3> forces = state.getForces();
double force = -3.0*3*5.0*(10*10);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(3.0*(10*10*10), state.getPotentialEnergy(), TOL);
context.setParameter("scale", 1.5);
context.setParameter("c", 1.0);
state = context.getState(State::Forces | State::Energy);
forces = state.getForces();
force = -1.5*3.0*3*6.0*(12*12);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(1.5*3.0*(12*12*12), state.getPotentialEnergy(), TOL);
}
void testExceptions() {
CudaPlatform platform;
System system;
VerletIntegrator integrator(0.01);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("a*r");
nonbonded->addParameter("a", "a1+a2");
vector<double> params(1);
vector<Vec3> positions(4);
for (int i = 0; i < 4; i++) {
system.addParticle(1.0);
params[0] = i+1;
nonbonded->addParticle(params);
positions[i] = Vec3(i, 0, 0);
}
nonbonded->addException(0, 1, vector<double>());
nonbonded->addException(1, 2, vector<double>());
nonbonded->addException(2, 3, vector<double>());
params[0] = 0.5;
nonbonded->addException(0, 2, params);
nonbonded->addException(1, 3, params);
system.addForce(nonbonded);
Context context(system, integrator, platform);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0.5+1+4, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0.5, 0, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(-0.5, 0, 0), forces[2], TOL);
ASSERT_EQUAL_VEC(Vec3(-(0.5+1+4), 0, 0), forces[3], TOL);
ASSERT_EQUAL_TOL((1+4)*3+0.5*2+0.5*2, state.getPotentialEnergy(), TOL);
}
void testCutoff() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("r");
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
forceField->setCutoffDistance(2.5);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 2, 0);
positions[2] = Vec3(0, 3, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0, 1, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -1, 0), forces[2], TOL);
ASSERT_EQUAL_TOL(2+1, state.getPotentialEnergy(), TOL);
}
void testPeriodic() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("r");
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
forceField->setCutoffDistance(2.0);
forceField->setPeriodicBoxVectors(Vec3(4, 0, 0), Vec3(0, 4, 0), Vec3(0, 0, 4));
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 2.1, 0);
positions[2] = Vec3(0, 3, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0, -2, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 2, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[2], TOL);
ASSERT_EQUAL_TOL(1.9+1+0.9, state.getPotentialEnergy(), TOL);
}
int main() {
try {
testSimpleExpression();
// testParameters();
// testExceptions();
testCutoff();
testPeriodic();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
......@@ -476,19 +476,19 @@ double ReferenceCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
ReferenceCalcCustomNonbondedForceKernel::~ReferenceCalcCustomNonbondedForceKernel() {
disposeRealArray(particleParamArray, numParticles);
disposeIntArray(exclusionArray, numParticles);
disposeIntArray(bonded14IndexArray, num14);
disposeRealArray(bonded14ParamArray, num14);
disposeIntArray(exceptionIndexArray, numExceptions);
disposeRealArray(exceptionParamArray, numExceptions);
if (neighborList != NULL)
delete neighborList;
}
void ReferenceCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
// Identify which exceptions are 1-4 interactions.
// Identify which exceptions are actual interactions.
numParticles = force.getNumParticles();
exclusions.resize(numParticles);
vector<int> nb14s;
vector<int> exceptions;
vector<double> parameters;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
......@@ -496,15 +496,15 @@ void ReferenceCalcCustomNonbondedForceKernel::initialize(const System& system, c
exclusions[particle1].insert(particle2);
exclusions[particle2].insert(particle1);
if (parameters.size() > 0)
nb14s.push_back(i);
exceptions.push_back(i);
}
// Build the arrays.
num14 = nb14s.size();
numExceptions = exceptions.size();
int numParameters = force.getNumParameters();
bonded14IndexArray = allocateIntArray(num14, 2);
bonded14ParamArray = allocateRealArray(num14, numParameters);
exceptionIndexArray = allocateIntArray(numExceptions, 2);
exceptionParamArray = allocateRealArray(numExceptions, numParameters);
particleParamArray = allocateRealArray(numParticles, numParameters);
for (int i = 0; i < numParticles; ++i) {
force.getParticleParameters(i, parameters);
......@@ -520,13 +520,13 @@ void ReferenceCalcCustomNonbondedForceKernel::initialize(const System& system, c
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter)
exclusionArray[i][++index] = *iter;
}
for (int i = 0; i < num14; ++i) {
for (int i = 0; i < numExceptions; ++i) {
int particle1, particle2;
force.getExceptionParameters(nb14s[i], particle1, particle2, parameters);
bonded14IndexArray[i][0] = particle1;
bonded14IndexArray[i][1] = particle2;
force.getExceptionParameters(exceptions[i], particle1, particle2, parameters);
exceptionIndexArray[i][0] = particle1;
exceptionIndexArray[i][1] = particle2;
for (int j = 0; j < numParameters; j++)
bonded14ParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]);
exceptionParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]);
}
nonbondedMethod = CalcCustomNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = (RealOpenMM) force.getCutoffDistance();
......@@ -568,7 +568,7 @@ void ReferenceCalcCustomNonbondedForceKernel::executeForces(ContextImpl& context
for (int i = 0; i < globalParameterNames.size(); i++)
globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]);
ixn.calculatePairIxn(numParticles, posData, particleParamArray, exclusionArray, 0, globalParameters, forceData, 0, 0);
ixn.calculateExceptionIxn(num14, bonded14IndexArray, posData, bonded14ParamArray, globalParameters, forceData, 0, 0);
ixn.calculateExceptionIxn(numExceptions, exceptionIndexArray, posData, exceptionParamArray, globalParameters, forceData, 0, 0);
}
double ReferenceCalcCustomNonbondedForceKernel::executeEnergy(ContextImpl& context) {
......@@ -587,7 +587,7 @@ double ReferenceCalcCustomNonbondedForceKernel::executeEnergy(ContextImpl& conte
for (int i = 0; i < globalParameterNames.size(); i++)
globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]);
ixn.calculatePairIxn(numParticles, posData, particleParamArray, exclusionArray, 0, globalParameters, forceData, 0, &energy);
ixn.calculateExceptionIxn(num14, bonded14IndexArray, posData, bonded14ParamArray, globalParameters, forceData, 0, &energy);
ixn.calculateExceptionIxn(numExceptions, exceptionIndexArray, posData, exceptionParamArray, globalParameters, forceData, 0, &energy);
disposeRealArray(forceData, numParticles);
return energy;
}
......
......@@ -304,9 +304,9 @@ public:
*/
double executeEnergy(ContextImpl& context);
private:
int numParticles, num14;
int **exclusionArray, **bonded14IndexArray;
RealOpenMM **particleParamArray, **bonded14ParamArray;
int numParticles, numExceptions;
int **exclusionArray, **exceptionIndexArray;
RealOpenMM **particleParamArray, **exceptionParamArray;
RealOpenMM nonbondedCutoff, periodicBoxSize[3];
std::vector<std::set<int> > exclusions;
Lepton::ExpressionProgram energyExpression, forceExpression;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment