Commit f6346776 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement new CUDA platform: CustomGBForce

parent 5feaa943
...@@ -123,6 +123,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -123,6 +123,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int major, minor; int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device)); CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor;
defaultOptimizationOptions = "--use_fast_math"; defaultOptimizationOptions = "--use_fast_math";
unsigned int flags = CU_CTX_MAP_HOST; unsigned int flags = CU_CTX_MAP_HOST;
if (useBlockingSync) if (useBlockingSync)
......
...@@ -105,10 +105,16 @@ public: ...@@ -105,10 +105,16 @@ public:
CUdevice getDevice() { CUdevice getDevice() {
return device; return device;
} }
/**
* Get the compute capability of the device associated with this object.
*/
double getComputeCapability() const {
return computeCapability;
}
/** /**
* Get the index of the CUdevice associated with this object. * Get the index of the CUdevice associated with this object.
*/ */
int getDeviceIndex() { int getDeviceIndex() const {
return deviceIndex; return deviceIndex;
} }
/** /**
...@@ -444,7 +450,7 @@ private: ...@@ -444,7 +450,7 @@ private:
void validateMolecules(); void validateMolecules();
static bool hasInitializedCuda; static bool hasInitializedCuda;
const System& system; const System& system;
double time; double time, computeCapability;
CudaPlatform::PlatformData& platformData; CudaPlatform::PlatformData& platformData;
int deviceIndex; int deviceIndex;
int contextIndex; int contextIndex;
......
...@@ -98,8 +98,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -98,8 +98,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaCalcCustomNonbondedForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomNonbondedForceKernel(name, platform, cu, context.getSystem());
if (name == CalcGBSAOBCForceKernel::Name()) if (name == CalcGBSAOBCForceKernel::Name())
return new CudaCalcGBSAOBCForceKernel(name, platform, cu); return new CudaCalcGBSAOBCForceKernel(name, platform, cu);
// if (name == CalcCustomGBForceKernel::Name()) if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem());
if (name == CalcCustomExternalForceKernel::Name()) if (name == CalcCustomExternalForceKernel::Name())
return new CudaCalcCustomExternalForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomExternalForceKernel(name, platform, cu, context.getSystem());
if (name == CalcCustomHbondForceKernel::Name()) if (name == CalcCustomHbondForceKernel::Name())
......
...@@ -2068,940 +2068,835 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -2068,940 +2068,835 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
cu.invalidateMolecules(); cu.invalidateMolecules();
} }
//class CudaCustomGBForceInfo : public CudaForceInfo { class CudaCustomGBForceInfo : public CudaForceInfo {
//public: public:
// CudaCustomGBForceInfo(int requiredBuffers, const CustomGBForce& force) : CudaForceInfo(requiredBuffers), force(force) { CudaCustomGBForceInfo(const CustomGBForce& force) : force(force) {
// } }
// bool areParticlesIdentical(int particle1, int particle2) { bool areParticlesIdentical(int particle1, int particle2) {
// vector<double> params1; vector<double> params1;
// vector<double> params2; vector<double> params2;
// force.getParticleParameters(particle1, params1); force.getParticleParameters(particle1, params1);
// force.getParticleParameters(particle2, params2); force.getParticleParameters(particle2, params2);
// for (int i = 0; i < (int) params1.size(); i++) for (int i = 0; i < (int) params1.size(); i++)
// if (params1[i] != params2[i]) if (params1[i] != params2[i])
// return false; return false;
// return true; return true;
// } }
// int getNumParticleGroups() { int getNumParticleGroups() {
// return force.getNumExclusions(); return force.getNumExclusions();
// } }
// void getParticlesInGroup(int index, vector<int>& particles) { void getParticlesInGroup(int index, vector<int>& particles) {
// int particle1, particle2; int particle1, particle2;
// force.getExclusionParticles(index, particle1, particle2); force.getExclusionParticles(index, particle1, particle2);
// particles.resize(2); particles.resize(2);
// particles[0] = particle1; particles[0] = particle1;
// particles[1] = particle2; particles[1] = particle2;
// } }
// bool areGroupsIdentical(int group1, int group2) { bool areGroupsIdentical(int group1, int group2) {
// return true; return true;
// } }
//private: private:
// const CustomGBForce& force; const CustomGBForce& force;
//}; };
//
//CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() { CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
// cu.setAsCurrent(); cu.setAsCurrent();
// if (params != NULL) if (params != NULL)
// delete params; delete params;
// if (computedValues != NULL) if (computedValues != NULL)
// delete computedValues; delete computedValues;
// if (energyDerivs != NULL) if (energyDerivs != NULL)
// delete energyDerivs; delete energyDerivs;
// if (longEnergyDerivs != NULL) if (longEnergyDerivs != NULL)
// delete longEnergyDerivs; delete longEnergyDerivs;
// if (globals != NULL) if (globals != NULL)
// delete globals; delete globals;
// if (valueBuffers != NULL) if (valueBuffers != NULL)
// delete valueBuffers; delete valueBuffers;
// if (longValueBuffers != NULL) if (tabulatedFunctionParams != NULL)
// delete longValueBuffers; delete tabulatedFunctionParams;
// if (tabulatedFunctionParams != NULL) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// delete tabulatedFunctionParams; delete tabulatedFunctions[i];
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++) }
// delete tabulatedFunctions[i];
//} void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
// cu.setAsCurrent();
//void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) { if (cu.getPlatformData().contexts.size() > 1)
// cu.setAsCurrent(); throw OpenMMException("CustomGBForce does not support using multiple CUDA devices");
// if (cu.getPlatformData().contexts.size() > 1) bool useExclusionsForValue = false;
// throw OpenMMException("CustomGBForce does not support using multiple CUDA devices"); numComputedValues = force.getNumComputedValues();
// bool useExclusionsForValue = false; vector<string> computedValueNames(force.getNumComputedValues());
// numComputedValues = force.getNumComputedValues(); vector<string> computedValueExpressions(force.getNumComputedValues());
// vector<string> computedValueNames(force.getNumComputedValues()); if (force.getNumComputedValues() > 0) {
// vector<string> computedValueExpressions(force.getNumComputedValues()); CustomGBForce::ComputationType type;
// if (force.getNumComputedValues() > 0) { force.getComputedValueParameters(0, computedValueNames[0], computedValueExpressions[0], type);
// CustomGBForce::ComputationType type; if (type == CustomGBForce::SingleParticle)
// force.getComputedValueParameters(0, computedValueNames[0], computedValueExpressions[0], type); throw OpenMMException("CudaPlatform requires that the first computed value for a CustomGBForce be of type ParticlePair or ParticlePairNoExclusions.");
// if (type == CustomGBForce::SingleParticle) useExclusionsForValue = (type == CustomGBForce::ParticlePair);
// throw OpenMMException("CudaPlatform requires that the first computed value for a CustomGBForce be of type ParticlePair or ParticlePairNoExclusions."); for (int i = 1; i < force.getNumComputedValues(); i++) {
// useExclusionsForValue = (type == CustomGBForce::ParticlePair); force.getComputedValueParameters(i, computedValueNames[i], computedValueExpressions[i], type);
// for (int i = 1; i < force.getNumComputedValues(); i++) { if (type != CustomGBForce::SingleParticle)
// force.getComputedValueParameters(i, computedValueNames[i], computedValueExpressions[i], type); throw OpenMMException("CudaPlatform requires that a CustomGBForce only have one computed value of type ParticlePair or ParticlePairNoExclusions.");
// if (type != CustomGBForce::SingleParticle) }
// throw OpenMMException("CudaPlatform requires that a CustomGBForce only have one computed value of type ParticlePair or ParticlePairNoExclusions."); }
// } int forceIndex;
// } for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
// int forceIndex; ;
// for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) string prefix = "custom"+cu.intToString(forceIndex)+"_";
// ;
// string prefix = "custom"+cu.intToString(forceIndex)+"_"; // Record parameters and exclusions.
//
// // Record parameters and exclusions. int numParticles = force.getNumParticles();
// params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customGBParameters", true);
// int numParticles = force.getNumParticles(); computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), numParticles, "customGBComputedValues", true);
// params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customGBParameters", true); if (force.getNumGlobalParameters() > 0)
// computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), numParticles, "customGBComputedValues", true); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
// if (force.getNumGlobalParameters() > 0) vector<vector<float> > paramVector(numParticles);
// globals = new CudaArray<cl_float>(cu, force.getNumGlobalParameters(), "customGBGlobals", false, CL_MEM_READ_ONLY); vector<vector<int> > exclusionList(numParticles);
// vector<vector<cl_float> > paramVector(numParticles); for (int i = 0; i < numParticles; i++) {
// vector<vector<int> > exclusionList(numParticles); vector<double> parameters;
// for (int i = 0; i < numParticles; i++) { force.getParticleParameters(i, parameters);
// vector<double> parameters; paramVector[i].resize(parameters.size());
// force.getParticleParameters(i, parameters); for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i].resize(parameters.size()); paramVector[i][j] = (float) parameters[j];
// for (int j = 0; j < (int) parameters.size(); j++) exclusionList[i].push_back(i);
// paramVector[i][j] = (cl_float) parameters[j]; }
// exclusionList[i].push_back(i); for (int i = 0; i < force.getNumExclusions(); i++) {
// } int particle1, particle2;
// for (int i = 0; i < force.getNumExclusions(); i++) { force.getExclusionParticles(i, particle1, particle2);
// int particle1, particle2; exclusionList[particle1].push_back(particle2);
// force.getExclusionParticles(i, particle1, particle2); exclusionList[particle2].push_back(particle1);
// exclusionList[particle1].push_back(particle2); }
// exclusionList[particle2].push_back(particle1); params->setParameterValues(paramVector);
// }
// params->setParameterValues(paramVector); // Record the tabulated functions.
//
// // Record the tabulated functions. CudaExpressionUtilities::FunctionPlaceholder fp;
// map<string, Lepton::CustomFunction*> functions;
// CudaExpressionUtilities::FunctionPlaceholder fp; vector<pair<string, string> > functionDefinitions;
// map<string, Lepton::CustomFunction*> functions; vector<float4> tabulatedFunctionParamsVec(force.getNumFunctions());
// vector<pair<string, string> > functionDefinitions; stringstream tableArgs;
// vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions()); for (int i = 0; i < force.getNumFunctions(); i++) {
// stringstream tableArgs; string name;
// for (int i = 0; i < force.getNumFunctions(); i++) { vector<double> values;
// string name; double min, max;
// vector<double> values; force.getFunctionParameters(i, name, values, min, max);
// double min, max; string arrayName = prefix+"table"+cu.intToString(i);
// force.getFunctionParameters(i, name, values, min, max); functionDefinitions.push_back(make_pair(name, arrayName));
// string arrayName = prefix+"table"+cu.intToString(i); functions[name] = &fp;
// functionDefinitions.push_back(make_pair(name, arrayName)); tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
// functions[name] = &fp; vector<float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2); tabulatedFunctions.push_back(CudaArray::create<float4>(cu, values.size()-1, "TabulatedFunction"));
// vector<mm_float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max); tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
// tabulatedFunctions.push_back(new CudaArray<mm_float4>(cu, values.size()-1, "TabulatedFunction")); cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tableArgs << ", const float4* __restrict__ " << arrayName;
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer())); }
// tableArgs << ", __global const float4* restrict " << arrayName; if (force.getNumFunctions() > 0) {
// } tabulatedFunctionParams = CudaArray::create<float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters");
// if (force.getNumFunctions() > 0) { tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// tabulatedFunctionParams = new CudaArray<mm_float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY); cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(float4), tabulatedFunctionParams->getDevicePointer()));
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); tableArgs << ", const float4* " << prefix << "functionParams";
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDevicePointer())); }
// tableArgs << ", __global const float4* " << prefix << "functionParams";
// } // Record the global parameters.
//
// // Record the global parameters. globalParamNames.resize(force.getNumGlobalParameters());
// globalParamValues.resize(force.getNumGlobalParameters());
// globalParamNames.resize(force.getNumGlobalParameters()); for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// globalParamValues.resize(force.getNumGlobalParameters()); globalParamNames[i] = force.getGlobalParameterName(i);
// for (int i = 0; i < force.getNumGlobalParameters(); i++) { globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
// globalParamNames[i] = force.getGlobalParameterName(i); }
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i); if (globals != NULL)
// } globals->upload(globalParamValues);
// if (globals != NULL)
// globals->upload(globalParamValues); // Record derivatives of expressions needed for the chain rule terms.
//
// // Record derivatives of expressions needed for the chain rule terms. vector<vector<Lepton::ParsedExpression> > valueGradientExpressions(force.getNumComputedValues());
// vector<vector<Lepton::ParsedExpression> > valueDerivExpressions(force.getNumComputedValues());
// vector<vector<Lepton::ParsedExpression> > valueGradientExpressions(force.getNumComputedValues()); needParameterGradient = false;
// vector<vector<Lepton::ParsedExpression> > valueDerivExpressions(force.getNumComputedValues()); for (int i = 1; i < force.getNumComputedValues(); i++) {
// needParameterGradient = false; Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
// for (int i = 1; i < force.getNumComputedValues(); i++) { valueGradientExpressions[i].push_back(ex.differentiate("x").optimize());
// Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize(); valueGradientExpressions[i].push_back(ex.differentiate("y").optimize());
// valueGradientExpressions[i].push_back(ex.differentiate("x").optimize()); valueGradientExpressions[i].push_back(ex.differentiate("z").optimize());
// valueGradientExpressions[i].push_back(ex.differentiate("y").optimize()); if (!isZeroExpression(valueGradientExpressions[i][0]) || !isZeroExpression(valueGradientExpressions[i][1]) || !isZeroExpression(valueGradientExpressions[i][2]))
// valueGradientExpressions[i].push_back(ex.differentiate("z").optimize()); needParameterGradient = true;
// if (!isZeroExpression(valueGradientExpressions[i][0]) || !isZeroExpression(valueGradientExpressions[i][1]) || !isZeroExpression(valueGradientExpressions[i][2])) for (int j = 0; j < i; j++)
// needParameterGradient = true; valueDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
// for (int j = 0; j < i; j++) }
// valueDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize()); vector<vector<Lepton::ParsedExpression> > energyDerivExpressions(force.getNumEnergyTerms());
// } vector<bool> needChainForValue(force.getNumComputedValues(), false);
// vector<vector<Lepton::ParsedExpression> > energyDerivExpressions(force.getNumEnergyTerms()); for (int i = 0; i < force.getNumEnergyTerms(); i++) {
// vector<bool> needChainForValue(force.getNumComputedValues(), false); string expression;
// for (int i = 0; i < force.getNumEnergyTerms(); i++) { CustomGBForce::ComputationType type;
// string expression; force.getEnergyTermParameters(i, expression, type);
// CustomGBForce::ComputationType type; Lepton::ParsedExpression ex = Lepton::Parser::parse(expression, functions).optimize();
// force.getEnergyTermParameters(i, expression, type); for (int j = 0; j < force.getNumComputedValues(); j++) {
// Lepton::ParsedExpression ex = Lepton::Parser::parse(expression, functions).optimize(); if (type == CustomGBForce::SingleParticle) {
// for (int j = 0; j < force.getNumComputedValues(); j++) { energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
// if (type == CustomGBForce::SingleParticle) { if (!isZeroExpression(energyDerivExpressions[i].back()))
// energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize()); needChainForValue[j] = true;
// if (!isZeroExpression(energyDerivExpressions[i].back())) }
// needChainForValue[j] = true; else {
// } energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"1").optimize());
// else { if (!isZeroExpression(energyDerivExpressions[i].back()))
// energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"1").optimize()); needChainForValue[j] = true;
// if (!isZeroExpression(energyDerivExpressions[i].back())) energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"2").optimize());
// needChainForValue[j] = true; if (!isZeroExpression(energyDerivExpressions[i].back()))
// energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"2").optimize()); needChainForValue[j] = true;
// if (!isZeroExpression(energyDerivExpressions[i].back())) }
// needChainForValue[j] = true; }
// } }
// } longEnergyDerivs = CudaArray::create<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
// } energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
// bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
// bool useLong = (cu.getSupports64BitGlobalAtomics() && !deviceIsCpu); // Create the kernels.
// if (useLong) {
// longEnergyDerivs = new CudaArray<cl_long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives"); bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
// energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true); bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic);
// } {
// else // Create the N2 value kernel.
// energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms()*cu.getNonbondedUtilities().getNumForceBuffers(), "customGBEnergyDerivatives", true);
// vector<pair<ExpressionTreeNode, string> > variables;
// // Create the kernels. map<string, string> rename;
// ExpressionTreeNode rnode(new Operation::Variable("r"));
// bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff); variables.push_back(make_pair(rnode, "r"));
// bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic); variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
// { variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
// // Create the N2 value kernel. for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
// const string& name = force.getPerParticleParameterName(i);
// vector<pair<ExpressionTreeNode, string> > variables; variables.push_back(makeVariable(name+"1", "params"+params->getParameterSuffix(i, "1")));
// map<string, string> rename; variables.push_back(makeVariable(name+"2", "params"+params->getParameterSuffix(i, "2")));
// ExpressionTreeNode rnode(new Operation::Variable("r")); rename[name+"1"] = name+"2";
// variables.push_back(make_pair(rnode, "r")); rename[name+"2"] = name+"1";
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2")); }
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR")); for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) { const string& name = force.getGlobalParameterName(i);
// const string& name = force.getPerParticleParameterName(i); string value = "globals["+cu.intToString(i)+"]";
// variables.push_back(makeVariable(name+"1", "params"+params->getParameterSuffix(i, "1"))); variables.push_back(makeVariable(name, value));
// variables.push_back(makeVariable(name+"2", "params"+params->getParameterSuffix(i, "2"))); }
// rename[name+"1"] = name+"2"; map<string, Lepton::ParsedExpression> n2ValueExpressions;
// rename[name+"2"] = name+"1"; stringstream n2ValueSource;
// } Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[0], functions).optimize();
// for (int i = 0; i < force.getNumGlobalParameters(); i++) { n2ValueExpressions["tempValue1 = "] = ex;
// const string& name = force.getGlobalParameterName(i); n2ValueExpressions["tempValue2 = "] = ex.renameVariables(rename);
// string value = "globals["+cu.intToString(i)+"]"; n2ValueSource << cu.getExpressionUtilities().createExpressions(n2ValueExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// variables.push_back(makeVariable(name, value)); map<string, string> replacements;
// } string n2ValueStr = n2ValueSource.str();
// map<string, Lepton::ParsedExpression> n2ValueExpressions; replacements["COMPUTE_VALUE"] = n2ValueStr;
// stringstream n2ValueSource; stringstream extraArgs, atomParams, loadLocal1, loadLocal2, load1, load2;
// Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[0], functions).optimize(); if (force.getNumGlobalParameters() > 0)
// n2ValueExpressions["tempValue1 = "] = ex; extraArgs << ", const float* globals";
// n2ValueExpressions["tempValue2 = "] = ex.renameVariables(rename); pairValueUsesParam.resize(params->getBuffers().size(), false);
// n2ValueSource << cu.getExpressionUtilities().createExpressions(n2ValueExpressions, variables, functionDefinitions, "temp", prefix+"functionParams"); int atomParamSize = 6;
// map<string, string> replacements; for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// string n2ValueStr = n2ValueSource.str(); CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// replacements["COMPUTE_VALUE"] = n2ValueStr; string paramName = "params"+cu.intToString(i+1);
// stringstream extraArgs, loadLocal1, loadLocal2, load1, load2; if (n2ValueStr.find(paramName+"1") != n2ValueStr.npos || n2ValueStr.find(paramName+"2") != n2ValueStr.npos) {
// if (force.getNumGlobalParameters() > 0) extraArgs << ", const " << buffer.getType() << "* __restrict__ global_" << paramName;
// extraArgs << ", __global const float* globals"; atomParams << buffer.getType() << " " << paramName << ";\n";
// pairValueUsesParam.resize(params->getBuffers().size(), false); loadLocal1 << "localData[localAtomIndex]." << paramName << " = " << paramName << "1;\n";
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { loadLocal2 << "localData[localAtomIndex]." << paramName << " = global_" << paramName << "[j];\n";
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
// string paramName = "params"+cu.intToString(i+1); load2 << buffer.getType() << " " << paramName << "2 = localData[atom2]." << paramName << ";\n";
// if (n2ValueStr.find(paramName+"1") != n2ValueStr.npos || n2ValueStr.find(paramName+"2") != n2ValueStr.npos) { pairValueUsesParam[i] = true;
// extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << paramName << ", __local " << buffer.getType() << "* restrict local_" << paramName; atomParamSize += buffer.getNumComponents();
// loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n"; }
// loadLocal2 << "local_" << paramName << "[localAtomIndex] = global_" << paramName << "[j];\n"; }
// load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n"; replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
// load2 << buffer.getType() << " " << paramName << "2 = local_" << paramName << "[atom2];\n"; replacements["ATOM_PARAMETER_DATA"] = atomParams.str();
// pairValueUsesParam[i] = true; replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
// } replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
// } replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
// replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str(); map<string, string> defines;
// replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str(); if (useCutoff)
// replacements["LOAD_ATOM1_PARAMETERS"] = load1.str(); defines["USE_CUTOFF"] = "1";
// replacements["LOAD_ATOM2_PARAMETERS"] = load2.str(); if (usePeriodic)
// map<string, string> defines; defines["USE_PERIODIC"] = "1";
// if (cu.getNonbondedUtilities().getForceBufferPerAtomBlock()) if (useExclusionsForValue)
// defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_EXCLUSIONS"] = "1";
// if (useCutoff) if (atomParamSize%2 == 0 && !cu.getUseDoublePrecision())
// defines["USE_CUTOFF"] = "1"; defines["NEED_PADDING"] = "1";
// if (usePeriodic) defines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize);
// defines["USE_PERIODIC"] = "1"; defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
// if (useExclusionsForValue) defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
// defines["USE_EXCLUSIONS"] = "1"; defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// if (cu.getSIMDWidth() == 32) defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// defines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize); defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
// defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customGBValueN2, replacements), defines);
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); pairValueKernel = cu.getKernel(module, "computeN2Value");
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); if (useExclusionsForValue)
// defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks()); cu.getNonbondedUtilities().requestExclusions(exclusionList);
// string file; }
// if (deviceIsCpu) {
// file = CudaKernelSources::customGBValueN2_cpu; // Create the kernel to reduce the N2 value and calculate other values.
// else if (cu.getSIMDWidth() == 32)
// file = CudaKernelSources::customGBValueN2_nvidia; stringstream reductionSource, extraArgs;
// else if (force.getNumGlobalParameters() > 0)
// file = CudaKernelSources::customGBValueN2_default; extraArgs << ", const float* globals";
// CUmodule module = cu.createModule(cu.replaceStrings(file, replacements), defines); for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// pairValueKernel = cu.getKernel(module, "computeN2Value"); CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// if (useExclusionsForValue) string paramName = "params"+cu.intToString(i+1);
// cu.getNonbondedUtilities().requestExclusions(exclusionList); extraArgs << ", const " << buffer.getType() << "* __restrict__ " << paramName;
// } }
// { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// // Create the kernel to reduce the N2 value and calculate other values. CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
// string valueName = "values"+cu.intToString(i+1);
// stringstream reductionSource, extraArgs; extraArgs << ", " << buffer.getType() << "* __restrict__ global_" << valueName;
// if (force.getNumGlobalParameters() > 0) reductionSource << buffer.getType() << " local_" << valueName << ";\n";
// extraArgs << ", __global const float* globals"; }
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n";
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; map<string, string> variables;
// string paramName = "params"+cu.intToString(i+1); variables["x"] = "pos.x";
// extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName; variables["y"] = "pos.y";
// } variables["z"] = "pos.z";
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 0; i < force.getNumPerParticleParameters(); i++)
// const CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
// string valueName = "values"+cu.intToString(i+1); for (int i = 0; i < force.getNumGlobalParameters(); i++)
// extraArgs << ", __global " << buffer.getType() << "* restrict global_" << valueName; variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]";
// reductionSource << buffer.getType() << " local_" << valueName << ";\n"; for (int i = 1; i < force.getNumComputedValues(); i++) {
// } variables[computedValueNames[i-1]] = "local_values"+computedValues->getParameterSuffix(i-1);
// reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n"; map<string, Lepton::ParsedExpression> valueExpressions;
// map<string, string> variables; valueExpressions["local_values"+computedValues->getParameterSuffix(i)+" = "] = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
// variables["x"] = "pos.x"; reductionSource << cu.getExpressionUtilities().createExpressions(valueExpressions, variables, functionDefinitions, "value"+cu.intToString(i)+"_temp", prefix+"functionParams");
// variables["y"] = "pos.y"; }
// variables["z"] = "pos.z"; for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) string valueName = "values"+cu.intToString(i+1);
// variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]"); reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n";
// for (int i = 0; i < force.getNumGlobalParameters(); i++) }
// variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]"; map<string, string> replacements;
// for (int i = 1; i < force.getNumComputedValues(); i++) { replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
// variables[computedValueNames[i-1]] = "local_values"+computedValues->getParameterSuffix(i-1); replacements["COMPUTE_VALUES"] = reductionSource.str();
// map<string, Lepton::ParsedExpression> valueExpressions; map<string, string> defines;
// valueExpressions["local_values"+computedValues->getParameterSuffix(i)+" = "] = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize(); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// reductionSource << cu.getExpressionUtilities().createExpressions(valueExpressions, variables, functionDefinitions, "value"+cu.intToString(i)+"_temp", prefix+"functionParams"); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBValuePerParticle, replacements), defines);
// } perParticleValueKernel = cu.getKernel(module, "computePerParticleValues");
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { }
// string valueName = "values"+cu.intToString(i+1); {
// reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n"; // Create the N2 energy kernel.
// }
// map<string, string> replacements; vector<pair<ExpressionTreeNode, string> > variables;
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); ExpressionTreeNode rnode(new Operation::Variable("r"));
// replacements["COMPUTE_VALUES"] = reductionSource.str(); variables.push_back(make_pair(rnode, "r"));
// map<string, string> defines; variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBValuePerParticle, replacements), defines); for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
// perParticleValueKernel = cu.getKernel(module, "computePerParticleValues"); const string& name = force.getPerParticleParameterName(i);
// } variables.push_back(makeVariable(name+"1", "params"+params->getParameterSuffix(i, "1")));
// { variables.push_back(makeVariable(name+"2", "params"+params->getParameterSuffix(i, "2")));
// // Create the N2 energy kernel. }
// for (int i = 0; i < force.getNumComputedValues(); i++) {
// vector<pair<ExpressionTreeNode, string> > variables; variables.push_back(makeVariable(computedValueNames[i]+"1", "values"+computedValues->getParameterSuffix(i, "1")));
// ExpressionTreeNode rnode(new Operation::Variable("r")); variables.push_back(makeVariable(computedValueNames[i]+"2", "values"+computedValues->getParameterSuffix(i, "2")));
// variables.push_back(make_pair(rnode, "r")); }
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2")); for (int i = 0; i < force.getNumGlobalParameters(); i++)
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR")); variables.push_back(makeVariable(force.getGlobalParameterName(i), "globals["+cu.intToString(i)+"]"));
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) { stringstream n2EnergySource;
// const string& name = force.getPerParticleParameterName(i); bool anyExclusions = (force.getNumExclusions() > 0);
// variables.push_back(makeVariable(name+"1", "params"+params->getParameterSuffix(i, "1"))); for (int i = 0; i < force.getNumEnergyTerms(); i++) {
// variables.push_back(makeVariable(name+"2", "params"+params->getParameterSuffix(i, "2"))); string expression;
// } CustomGBForce::ComputationType type;
// for (int i = 0; i < force.getNumComputedValues(); i++) { force.getEnergyTermParameters(i, expression, type);
// variables.push_back(makeVariable(computedValueNames[i]+"1", "values"+computedValues->getParameterSuffix(i, "1"))); if (type == CustomGBForce::SingleParticle)
// variables.push_back(makeVariable(computedValueNames[i]+"2", "values"+computedValues->getParameterSuffix(i, "2"))); continue;
// } bool exclude = (anyExclusions && type == CustomGBForce::ParticlePair);
// for (int i = 0; i < force.getNumGlobalParameters(); i++) map<string, Lepton::ParsedExpression> n2EnergyExpressions;
// variables.push_back(makeVariable(force.getGlobalParameterName(i), "globals["+cu.intToString(i)+"]")); n2EnergyExpressions["tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize();
// stringstream n2EnergySource; n2EnergyExpressions["dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize();
// bool anyExclusions = (force.getNumExclusions() > 0); for (int j = 0; j < force.getNumComputedValues(); j++) {
// for (int i = 0; i < force.getNumEnergyTerms(); i++) { if (needChainForValue[j]) {
// string expression; string index = cu.intToString(j+1);
// CustomGBForce::ComputationType type; n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_1 += "] = energyDerivExpressions[i][2*j];
// force.getEnergyTermParameters(i, expression, type); n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1];
// if (type == CustomGBForce::SingleParticle) }
// continue; }
// bool exclude = (anyExclusions && type == CustomGBForce::ParticlePair); if (exclude)
// map<string, Lepton::ParsedExpression> n2EnergyExpressions; n2EnergySource << "if (!isExcluded) {\n";
// n2EnergyExpressions["tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize(); n2EnergySource << cu.getExpressionUtilities().createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// n2EnergyExpressions["dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize(); if (exclude)
// if (useLong) { n2EnergySource << "}\n";
// for (int j = 0; j < force.getNumComputedValues(); j++) { }
// if (needChainForValue[j]) { map<string, string> replacements;
// string index = cu.intToString(j+1); string n2EnergyStr = n2EnergySource.str();
// n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_1 += "] = energyDerivExpressions[i][2*j]; replacements["COMPUTE_INTERACTION"] = n2EnergyStr;
// n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1]; stringstream extraArgs, atomParams, loadLocal1, loadLocal2, clearLocal, load1, load2, declare1, recordDeriv, storeDerivs1, storeDerivs2, declareTemps, setTemps;
// } if (force.getNumGlobalParameters() > 0)
// } extraArgs << ", const float* globals";
// } pairEnergyUsesParam.resize(params->getBuffers().size(), false);
// else { int atomParamSize = 7;
// for (int j = 0; j < force.getNumComputedValues(); j++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// if (needChainForValue[j]) { CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_1")+" += "] = energyDerivExpressions[i][2*j]; string paramName = "params"+cu.intToString(i+1);
// n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_2")+" += "] = energyDerivExpressions[i][2*j+1]; if (n2EnergyStr.find(paramName+"1") != n2EnergyStr.npos || n2EnergyStr.find(paramName+"2") != n2EnergyStr.npos) {
// } extraArgs << ", const " << buffer.getType() << "* __restrict__ global_" << paramName;
// } atomParams << buffer.getType() << " " << paramName << ";\n";
// } loadLocal1 << "localData[localAtomIndex]." << paramName << " = " << paramName << "1;\n";
// if (exclude) loadLocal2 << "localData[localAtomIndex]." << paramName << " = global_" << paramName << "[j];\n";
// n2EnergySource << "if (!isExcluded) {\n"; load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
// n2EnergySource << cu.getExpressionUtilities().createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams"); load2 << buffer.getType() << " " << paramName << "2 = localData[atom2]." << paramName << ";\n";
// if (exclude) pairEnergyUsesParam[i] = true;
// n2EnergySource << "}\n"; atomParamSize += buffer.getNumComponents();
// } }
// map<string, string> replacements; }
// string n2EnergyStr = n2EnergySource.str(); pairEnergyUsesValue.resize(computedValues->getBuffers().size(), false);
// replacements["COMPUTE_INTERACTION"] = n2EnergyStr; for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// stringstream extraArgs, loadLocal1, loadLocal2, clearLocal, load1, load2, declare1, recordDeriv, storeDerivs1, storeDerivs2, declareTemps, setTemps; CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
// if (force.getNumGlobalParameters() > 0) string valueName = "values"+cu.intToString(i+1);
// extraArgs << ", __global const float* globals"; if (n2EnergyStr.find(valueName+"1") != n2EnergyStr.npos || n2EnergyStr.find(valueName+"2") != n2EnergyStr.npos) {
// pairEnergyUsesParam.resize(params->getBuffers().size(), false); extraArgs << ", const " << buffer.getType() << "* __restrict__ global_" << valueName;
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { atomParams << buffer.getType() << " " << valueName << ";\n";
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; loadLocal1 << "localData[localAtomIndex]." << valueName << " = " << valueName << "1;\n";
// string paramName = "params"+cu.intToString(i+1); loadLocal2 << "localData[localAtomIndex]." << valueName << " = global_" << valueName << "[j];\n";
// if (n2EnergyStr.find(paramName+"1") != n2EnergyStr.npos || n2EnergyStr.find(paramName+"2") != n2EnergyStr.npos) { load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n";
// extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << paramName << ", __local " << buffer.getType() << "* restrict local_" << paramName; load2 << buffer.getType() << " " << valueName << "2 = localData[atom2]." << valueName << ";\n";
// loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n"; pairEnergyUsesValue[i] = true;
// loadLocal2 << "local_" << paramName << "[localAtomIndex] = global_" << paramName << "[j];\n"; atomParamSize += buffer.getNumComponents();
// load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n"; }
// load2 << buffer.getType() << " " << paramName << "2 = local_" << paramName << "[atom2];\n"; }
// pairEnergyUsesParam[i] = true; extraArgs << ", unsigned long long* __restrict__ derivBuffers";
// } for (int i = 0; i < force.getNumComputedValues(); i++) {
// } string index = cu.intToString(i+1);
// pairEnergyUsesValue.resize(computedValues->getBuffers().size(), false); atomParams << "real deriv" << index << ";\n";
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { clearLocal << "localData[localAtomIndex].deriv" << index << " = 0;\n";
// const CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; declare1 << "real deriv" << index << "_1 = 0;\n";
// string valueName = "values"+cu.intToString(i+1); load2 << "real deriv" << index << "_2 = 0;\n";
// if (n2EnergyStr.find(valueName+"1") != n2EnergyStr.npos || n2EnergyStr.find(valueName+"2") != n2EnergyStr.npos) { recordDeriv << "localData[atom2].deriv" << index << " += deriv" << index << "_2;\n";
// extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << valueName << ", __local " << buffer.getType() << "* restrict local_" << valueName; storeDerivs1 << "STORE_DERIVATIVE_1(" << index << ")\n";
// loadLocal1 << "local_" << valueName << "[localAtomIndex] = " << valueName << "1;\n"; storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n";
// loadLocal2 << "local_" << valueName << "[localAtomIndex] = global_" << valueName << "[j];\n"; declareTemps << "__local real tempDerivBuffer" << index << "[64];\n";
// load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n"; setTemps << "tempDerivBuffer" << index << "[threadIdx.x] = deriv" << index << "_1;\n";
// load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n"; atomParamSize++;
// pairEnergyUsesValue[i] = true; }
// } replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
// } replacements["ATOM_PARAMETER_DATA"] = atomParams.str();
// if (useLong) { replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
// extraArgs << ", __global long* restrict derivBuffers"; replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
// for (int i = 0; i < force.getNumComputedValues(); i++) { replacements["CLEAR_LOCAL_DERIVATIVES"] = clearLocal.str();
// string index = cu.intToString(i+1); replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
// extraArgs << ", __local float* restrict local_deriv" << index; replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
// clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n"; replacements["DECLARE_ATOM1_DERIVATIVES"] = declare1.str();
// declare1 << "float deriv" << index << "_1 = 0.0f;\n"; replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str();
// load2 << "float deriv" << index << "_2 = 0.0f;\n"; replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str();
// recordDeriv << "local_deriv" << index << "[atom2] += deriv" << index << "_2;\n"; replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str();
// storeDerivs1 << "STORE_DERIVATIVE_1(" << index << ")\n"; replacements["DECLARE_TEMP_BUFFERS"] = declareTemps.str();
// storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n"; replacements["SET_TEMP_BUFFERS"] = setTemps.str();
// declareTemps << "__local float tempDerivBuffer" << index << "[64];\n"; map<string, string> defines;
// setTemps << "tempDerivBuffer" << index << "[get_local_id(0)] = deriv" << index << "_1;\n"; if (useCutoff)
// } defines["USE_CUTOFF"] = "1";
// } if (usePeriodic)
// else { defines["USE_PERIODIC"] = "1";
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { if (anyExclusions)
// const CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; defines["USE_EXCLUSIONS"] = "1";
// string index = cu.intToString(i+1); if (atomParamSize%2 == 0 && !cu.getUseDoublePrecision())
// extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index << ", __local " << buffer.getType() << "* restrict local_deriv" << index; defines["NEED_PADDING"] = "1";
// clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n"; defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
// declare1 << buffer.getType() << " deriv" << index << "_1 = 0.0f;\n"; defines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize);
// load2 << buffer.getType() << " deriv" << index << "_2 = 0.0f;\n"; defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
// recordDeriv << "local_deriv" << index << "[atom2] += deriv" << index << "_2;\n"; defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// storeDerivs1 << "STORE_DERIVATIVE_1(" << index << ")\n"; defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n"; defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
// declareTemps << "__local " << buffer.getType() << " tempDerivBuffer" << index << "[64];\n"; CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customGBEnergyN2, replacements), defines);
// setTemps << "tempDerivBuffer" << index << "[get_local_id(0)] = deriv" << index << "_1;\n"; pairEnergyKernel = cu.getKernel(module, "computeN2Energy");
// } }
// } {
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); // Create the kernel to reduce the derivatives and calculate per-particle energy terms.
// replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
// replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str(); stringstream compute, extraArgs, load;
// replacements["CLEAR_LOCAL_DERIVATIVES"] = clearLocal.str(); if (force.getNumGlobalParameters() > 0)
// replacements["LOAD_ATOM1_PARAMETERS"] = load1.str(); extraArgs << ", const float* globals";
// replacements["LOAD_ATOM2_PARAMETERS"] = load2.str(); for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// replacements["DECLARE_ATOM1_DERIVATIVES"] = declare1.str(); CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str(); string paramName = "params"+cu.intToString(i+1);
// replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str(); extraArgs << ", const " << buffer.getType() << "* __restrict__ " << paramName;
// replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str(); }
// replacements["DECLARE_TEMP_BUFFERS"] = declareTemps.str(); for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// replacements["SET_TEMP_BUFFERS"] = setTemps.str(); CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
// map<string, string> defines; string valueName = "values"+cu.intToString(i+1);
// if (cu.getNonbondedUtilities().getForceBufferPerAtomBlock()) extraArgs << ", const " << buffer.getType() << "* __restrict__ " << valueName;
// defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; }
// if (useCutoff) for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
// defines["USE_CUTOFF"] = "1"; CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
// if (usePeriodic) string index = cu.intToString(i+1);
// defines["USE_PERIODIC"] = "1"; extraArgs << ", " << buffer.getType() << "* __restrict__ derivBuffers" << index;
// if (anyExclusions) compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
// defines["USE_EXCLUSIONS"] = "1"; }
// if (cu.getSIMDWidth() == 32) extraArgs << ", const long long* __restrict__ derivBuffersIn";
// defines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize); for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
// defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); load << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") <<
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); " = RECIP(0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << cu.intToString(i) << "];\n";
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks()); // Compute the various expressions.
// string file;
// if (deviceIsCpu) map<string, string> variables;
// file = CudaKernelSources::customGBEnergyN2_cpu; variables["x"] = "pos.x";
// else if (cu.getSIMDWidth() == 32) variables["y"] = "pos.y";
// file = CudaKernelSources::customGBEnergyN2_nvidia; variables["z"] = "pos.z";
// else for (int i = 0; i < force.getNumPerParticleParameters(); i++)
// file = CudaKernelSources::customGBEnergyN2_default; variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
// CUmodule module = cu.createModule(cu.replaceStrings(file, replacements), defines); for (int i = 0; i < force.getNumGlobalParameters(); i++)
// pairEnergyKernel = cu.getKernel(module, "computeN2Energy"); variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]";
// } for (int i = 0; i < force.getNumComputedValues(); i++)
// { variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
// // Create the kernel to reduce the derivatives and calculate per-particle energy terms. map<string, Lepton::ParsedExpression> expressions;
// for (int i = 0; i < force.getNumEnergyTerms(); i++) {
// stringstream compute, extraArgs, reduce; string expression;
// if (force.getNumGlobalParameters() > 0) CustomGBForce::ComputationType type;
// extraArgs << ", __global const float* globals"; force.getEnergyTermParameters(i, expression, type);
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { if (type != CustomGBForce::SingleParticle)
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; continue;
// string paramName = "params"+cu.intToString(i+1); Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression, functions).optimize();
// extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName; expressions["/*"+cu.intToString(i+1)+"*/ energy += "] = parsed;
// } for (int j = 0; j < force.getNumComputedValues(); j++)
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { expressions["/*"+cu.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
// const CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; Lepton::ParsedExpression gradx = parsed.differentiate("x").optimize();
// string valueName = "values"+cu.intToString(i+1); Lepton::ParsedExpression grady = parsed.differentiate("y").optimize();
// extraArgs << ", __global const " << buffer.getType() << "* restrict " << valueName; Lepton::ParsedExpression gradz = parsed.differentiate("z").optimize();
// } if (!isZeroExpression(gradx))
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { expressions["/*"+cu.intToString(i+1)+"*/ force.x -= "] = gradx;
// const CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; if (!isZeroExpression(grady))
// string index = cu.intToString(i+1); expressions["/*"+cu.intToString(i+1)+"*/ force.y -= "] = grady;
// extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index; if (!isZeroExpression(gradz))
// compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n"; expressions["/*"+cu.intToString(i+1)+"*/ force.z -= "] = gradz;
// } }
// if (useLong) { for (int i = 1; i < force.getNumComputedValues(); i++)
// extraArgs << ", __global const long* restrict derivBuffersIn"; for (int j = 0; j < i; j++)
// for (int i = 0; i < energyDerivs->getNumParameters(); ++i) expressions["real dV"+cu.intToString(i)+"dV"+cu.intToString(j)+" = "] = valueDerivExpressions[i][j];
// reduce << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") << compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// " = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << cu.intToString(i) << "];\n";
// } // Record values.
// else {
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) compute << "forceBuffers[index] += (long long) (force.x*0xFFFFFFFF);\n";
// reduce << "REDUCE_VALUE(derivBuffers" << cu.intToString(i+1) << ", " << energyDerivs->getBuffers()[i].getType() << ")\n"; compute << "forceBuffers[index+PADDED_NUM_ATOMS] += (long long) (force.y*0xFFFFFFFF);\n";
// } compute << "forceBuffers[index+PADDED_NUM_ATOMS*2] += (long long) (force.z*0xFFFFFFFF);\n";
// for (int i = 1; i < force.getNumComputedValues(); i++) {
// // Compute the various expressions. compute << "real totalDeriv"<<i<<" = dV"<<i<<"dV0";
// for (int j = 1; j < i; j++)
// map<string, string> variables; compute << " + totalDeriv"<<j<<"*dV"<<i<<"dV"<<j;
// variables["x"] = "pos.x"; compute << ";\n";
// variables["y"] = "pos.y"; compute << "deriv"<<(i+1)<<" *= totalDeriv"<<i<<";\n";
// variables["z"] = "pos.z"; }
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
// variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]"); string index = cu.intToString(i+1);
// for (int i = 0; i < force.getNumGlobalParameters(); i++) compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
// variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]"; }
// for (int i = 0; i < force.getNumComputedValues(); i++) map<string, string> replacements;
// variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]"); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
// map<string, Lepton::ParsedExpression> expressions; replacements["LOAD_DERIVATIVES"] = load.str();
// for (int i = 0; i < force.getNumEnergyTerms(); i++) { replacements["COMPUTE_ENERGY"] = compute.str();
// string expression; map<string, string> defines;
// CustomGBForce::ComputationType type; defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// force.getEnergyTermParameters(i, expression, type); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// if (type != CustomGBForce::SingleParticle) CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBEnergyPerParticle, replacements), defines);
// continue; perParticleEnergyKernel = cu.getKernel(module, "computePerParticleEnergy");
// Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression, functions).optimize(); }
// expressions["/*"+cu.intToString(i+1)+"*/ energy += "] = parsed; if (needParameterGradient) {
// for (int j = 0; j < force.getNumComputedValues(); j++) // Create the kernel to compute chain rule terms for computed values that depend explicitly on particle coordinates.
// expressions["/*"+cu.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
// Lepton::ParsedExpression gradx = parsed.differentiate("x").optimize(); stringstream compute, extraArgs;
// Lepton::ParsedExpression grady = parsed.differentiate("y").optimize(); if (force.getNumGlobalParameters() > 0)
// Lepton::ParsedExpression gradz = parsed.differentiate("z").optimize(); extraArgs << ", const float* globals";
// if (!isZeroExpression(gradx)) for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// expressions["/*"+cu.intToString(i+1)+"*/ force.x -= "] = gradx; CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// if (!isZeroExpression(grady)) string paramName = "params"+cu.intToString(i+1);
// expressions["/*"+cu.intToString(i+1)+"*/ force.y -= "] = grady; extraArgs << ", const " << buffer.getType() << "* __restrict__ " << paramName;
// if (!isZeroExpression(gradz)) }
// expressions["/*"+cu.intToString(i+1)+"*/ force.z -= "] = gradz; for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// } CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
// for (int i = 1; i < force.getNumComputedValues(); i++) string valueName = "values"+cu.intToString(i+1);
// for (int j = 0; j < i; j++) extraArgs << ", const " << buffer.getType() << "* __restrict__ " << valueName;
// expressions["float dV"+cu.intToString(i)+"dV"+cu.intToString(j)+" = "] = valueDerivExpressions[i][j]; }
// compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functionDefinitions, "temp", prefix+"functionParams"); for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
// CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
// // Record values. string index = cu.intToString(i+1);
// extraArgs << ", " << buffer.getType() << "* __restrict__ derivBuffers" << index;
// compute << "forceBuffers[index] = forceBuffers[index]+force;\n"; compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
// for (int i = 1; i < force.getNumComputedValues(); i++) { }
// compute << "float totalDeriv"<<i<<" = dV"<<i<<"dV0"; map<string, string> variables;
// for (int j = 1; j < i; j++) variables["x"] = "pos.x";
// compute << " + totalDeriv"<<j<<"*dV"<<i<<"dV"<<j; variables["y"] = "pos.y";
// compute << ";\n"; variables["z"] = "pos.z";
// compute << "deriv"<<(i+1)<<" *= totalDeriv"<<i<<";\n"; for (int i = 0; i < force.getNumPerParticleParameters(); i++)
// } variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++)
// string index = cu.intToString(i+1); variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]";
// compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n"; for (int i = 0; i < force.getNumComputedValues(); i++)
// } variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
// map<string, string> replacements; for (int i = 1; i < force.getNumComputedValues(); i++) {
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); string is = cu.intToString(i);
// replacements["REDUCE_DERIVATIVES"] = reduce.str(); compute << "real3 dV"<<is<<"dR = make_real3(0);\n";
// replacements["COMPUTE_ENERGY"] = compute.str(); for (int j = 1; j < i; j++) {
// map<string, string> defines; if (!isZeroExpression(valueDerivExpressions[i][j])) {
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); map<string, Lepton::ParsedExpression> derivExpressions;
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); string js = cu.intToString(j);
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBEnergyPerParticle, replacements), defines); derivExpressions["real dV"+is+"dV"+js+" = "] = valueDerivExpressions[i][j];
// perParticleEnergyKernel = cu.getKernel(module, "computePerParticleEnergy"); compute << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, "temp_"+is+"_"+js, prefix+"functionParams");
// } compute << "dV"<<is<<"dR += dV"<<is<<"dV"<<js<<"*dV"<<js<<"dR;\n";
// if (needParameterGradient) { }
// // Create the kernel to compute chain rule terms for computed values that depend explicitly on particle coordinates. }
// map<string, Lepton::ParsedExpression> gradientExpressions;
// stringstream compute, extraArgs; if (!isZeroExpression(valueGradientExpressions[i][0]))
// if (force.getNumGlobalParameters() > 0) gradientExpressions["dV"+is+"dR.x += "] = valueGradientExpressions[i][0];
// extraArgs << ", __global const float* globals"; if (!isZeroExpression(valueGradientExpressions[i][1]))
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { gradientExpressions["dV"+is+"dR.y += "] = valueGradientExpressions[i][1];
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; if (!isZeroExpression(valueGradientExpressions[i][2]))
// string paramName = "params"+cu.intToString(i+1); gradientExpressions["dV"+is+"dR.z += "] = valueGradientExpressions[i][2];
// extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName; compute << cu.getExpressionUtilities().createExpressions(gradientExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// } }
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 1; i < force.getNumComputedValues(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; string is = cu.intToString(i);
// string valueName = "values"+cu.intToString(i+1); compute << "force -= deriv"<<energyDerivs->getParameterSuffix(i)<<"*dV"<<is<<"dR;\n";
// extraArgs << ", __global const " << buffer.getType() << "* restrict " << valueName; }
// } map<string, string> replacements;
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
// const CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; replacements["COMPUTE_FORCES"] = compute.str();
// string index = cu.intToString(i+1); map<string, string> defines;
// extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index; defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n"; defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// } CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customGBGradientChainRule, replacements), defines);
// map<string, string> variables; gradientChainRuleKernel = cu.getKernel(module, "computeGradientChainRuleTerms");
// variables["x"] = "pos.x"; }
// variables["y"] = "pos.y"; {
// variables["z"] = "pos.z"; // Create the code to calculate chain rules terms as part of the default nonbonded kernel.
// for (int i = 0; i < force.getNumPerParticleParameters(); i++)
// variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]"); vector<pair<ExpressionTreeNode, string> > globalVariables;
// for (int i = 0; i < force.getNumGlobalParameters(); i++) for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]"; const string& name = force.getGlobalParameterName(i);
// for (int i = 0; i < force.getNumComputedValues(); i++) string value = "globals["+cu.intToString(i)+"]";
// variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]"); globalVariables.push_back(makeVariable(name, prefix+value));
// for (int i = 1; i < force.getNumComputedValues(); i++) { }
// string is = cu.intToString(i); vector<pair<ExpressionTreeNode, string> > variables = globalVariables;
// compute << "float4 dV"<<is<<"dR = (float4) 0;\n"; map<string, string> rename;
// for (int j = 1; j < i; j++) { ExpressionTreeNode rnode(new Operation::Variable("r"));
// if (!isZeroExpression(valueDerivExpressions[i][j])) { variables.push_back(make_pair(rnode, "r"));
// map<string, Lepton::ParsedExpression> derivExpressions; variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
// string js = cu.intToString(j); variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
// derivExpressions["float dV"+is+"dV"+js+" = "] = valueDerivExpressions[i][j]; for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
// compute << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, "temp_"+is+"_"+js, prefix+"functionParams"); const string& name = force.getPerParticleParameterName(i);
// compute << "dV"<<is<<"dR += dV"<<is<<"dV"<<js<<"*dV"<<js<<"dR;\n"; variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
// } variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
// } rename[name+"1"] = name+"2";
// map<string, Lepton::ParsedExpression> gradientExpressions; rename[name+"2"] = name+"1";
// if (!isZeroExpression(valueGradientExpressions[i][0])) }
// gradientExpressions["dV"+is+"dR.x += "] = valueGradientExpressions[i][0]; map<string, Lepton::ParsedExpression> derivExpressions;
// if (!isZeroExpression(valueGradientExpressions[i][1])) stringstream chainSource;
// gradientExpressions["dV"+is+"dR.y += "] = valueGradientExpressions[i][1]; Lepton::ParsedExpression dVdR = Lepton::Parser::parse(computedValueExpressions[0], functions).differentiate("r").optimize();
// if (!isZeroExpression(valueGradientExpressions[i][2])) derivExpressions["real dV0dR1 = "] = dVdR;
// gradientExpressions["dV"+is+"dR.z += "] = valueGradientExpressions[i][2]; derivExpressions["real dV0dR2 = "] = dVdR.renameVariables(rename);
// compute << cu.getExpressionUtilities().createExpressions(gradientExpressions, variables, functionDefinitions, "temp", prefix+"functionParams"); chainSource << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, prefix+"temp0_", prefix+"functionParams");
// } if (needChainForValue[0]) {
// for (int i = 1; i < force.getNumComputedValues(); i++) { if (useExclusionsForValue)
// string is = cu.intToString(i); chainSource << "if (!isExcluded) {\n";
// compute << "force -= deriv"<<energyDerivs->getParameterSuffix(i)<<"*dV"<<is<<"dR;\n"; chainSource << "tempForce -= dV0dR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "1") << ";\n";
// } chainSource << "tempForce -= dV0dR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "2") << ";\n";
// map<string, string> replacements; if (useExclusionsForValue)
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); chainSource << "}\n";
// replacements["COMPUTE_FORCES"] = compute.str(); }
// map<string, string> defines; for (int i = 1; i < force.getNumComputedValues(); i++) {
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); if (needChainForValue[i]) {
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBGradientChainRule, replacements), defines); chainSource << "tempForce -= dV0dR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "1") << ";\n";
// gradientChainRuleKernel = cu.getKernel(module, "computeGradientChainRuleTerms"); chainSource << "tempForce -= dV0dR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "2") << ";\n";
// } }
// { }
// // Create the code to calculate chain rules terms as part of the default nonbonded kernel. map<string, string> replacements;
// string chainStr = chainSource.str();
// vector<pair<ExpressionTreeNode, string> > globalVariables; replacements["COMPUTE_FORCE"] = chainStr;
// for (int i = 0; i < force.getNumGlobalParameters(); i++) { string source = cu.replaceStrings(CudaKernelSources::customGBChainRule, replacements);
// const string& name = force.getGlobalParameterName(i); vector<CudaNonbondedUtilities::ParameterInfo> parameters;
// string value = "globals["+cu.intToString(i)+"]"; vector<CudaNonbondedUtilities::ParameterInfo> arguments;
// globalVariables.push_back(makeVariable(name, prefix+value)); for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// } CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// vector<pair<ExpressionTreeNode, string> > variables = globalVariables; string paramName = prefix+"params"+cu.intToString(i+1);
// map<string, string> rename; if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
// ExpressionTreeNode rnode(new Operation::Variable("r")); parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
// variables.push_back(make_pair(rnode, "r")); }
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2")); for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR")); CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) { string paramName = prefix+"values"+cu.intToString(i+1);
// const string& name = force.getPerParticleParameterName(i); if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
// variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1"))); parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
// variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2"))); }
// rename[name+"1"] = name+"2"; for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
// rename[name+"2"] = name+"1"; if (needChainForValue[i]) {
// } CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
// map<string, Lepton::ParsedExpression> derivExpressions; string paramName = prefix+"dEdV"+cu.intToString(i+1);
// stringstream chainSource; parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
// Lepton::ParsedExpression dVdR = Lepton::Parser::parse(computedValueExpressions[0], functions).differentiate("r").optimize(); }
// derivExpressions["float dV0dR1 = "] = dVdR; }
// derivExpressions["float dV0dR2 = "] = dVdR.renameVariables(rename); if (globals != NULL) {
// chainSource << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, prefix+"temp0_", prefix+"functionParams"); globals->upload(globalParamValues);
// if (needChainForValue[0]) { arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
// if (useExclusionsForValue) }
// chainSource << "if (!isExcluded) {\n"; cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
// chainSource << "tempForce -= dV0dR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "1") << ";\n"; for (int i = 0; i < (int) parameters.size(); i++)
// chainSource << "tempForce -= dV0dR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "2") << ";\n"; cu.getNonbondedUtilities().addParameter(parameters[i]);
// if (useExclusionsForValue) for (int i = 0; i < (int) arguments.size(); i++)
// chainSource << "}\n"; cu.getNonbondedUtilities().addArgument(arguments[i]);
// } }
// for (int i = 1; i < force.getNumComputedValues(); i++) { cu.addForce(new CudaCustomGBForceInfo(force));
// if (needChainForValue[i]) { cu.addAutoclearBuffer(longEnergyDerivs->getDevicePointer(), sizeof(long long)*longEnergyDerivs->getSize());
// chainSource << "tempForce -= dV0dR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "1") << ";\n"; }
// chainSource << "tempForce -= dV0dR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "2") << ";\n";
// } double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// } CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// map<string, string> replacements; if (!hasInitializedKernels) {
// string chainStr = chainSource.str(); hasInitializedKernels = true;
// replacements["COMPUTE_FORCE"] = chainStr; maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
// string source = cu.replaceStrings(CudaKernelSources::customGBChainRule, replacements); valueBuffers = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
// vector<CudaNonbondedUtilities::ParameterInfo> parameters; cu.addAutoclearBuffer(valueBuffers->getDevicePointer(), sizeof(long long)*valueBuffers->getSize());
// vector<CudaNonbondedUtilities::ParameterInfo> arguments; cu.clearBuffer(valueBuffers->getDevicePointer(), sizeof(long long)*valueBuffers->getSize());
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
// string paramName = prefix+"params"+cu.intToString(i+1); pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionIndices().getDevicePointer());
// if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos) pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionRowIndices().getDevicePointer());
// parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); pairValueArgs.push_back(&valueBuffers->getDevicePointer());
// } if (nb.getUseCutoff()) {
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
// const CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
// string paramName = prefix+"values"+cu.intToString(i+1); pairValueArgs.push_back(cu.getPeriodicBoxSizePointer());
// if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos) pairValueArgs.push_back(cu.getInvPeriodicBoxSizePointer());
// parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); pairValueArgs.push_back(&maxTiles);
// } pairValueArgs.push_back(&nb.getInteractionFlags().getDevicePointer());
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { }
// if (needChainForValue[i]) { else
// const CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; pairValueArgs.push_back(&maxTiles);
// string paramName = prefix+"dEdV"+cu.intToString(i+1); if (globals != NULL)
// parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); pairValueArgs.push_back(&globals->getDevicePointer());
// } for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// } if (pairValueUsesParam[i])
// if (globals != NULL) { pairValueArgs.push_back(&params->getBuffers()[i].getMemory());
// globals->upload(globalParamValues); }
// arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDevicePointer())); if (tabulatedFunctionParams != NULL) {
// } for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); pairValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
// for (int i = 0; i < (int) parameters.size(); i++) pairValueArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
// cu.getNonbondedUtilities().addParameter(parameters[i]); }
// for (int i = 0; i < (int) arguments.size(); i++) perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
// cu.getNonbondedUtilities().addArgument(arguments[i]); perParticleValueArgs.push_back(&valueBuffers->getDevicePointer());
// } if (globals != NULL)
// cu.addForce(new CudaCustomGBForceInfo(cu.getNonbondedUtilities().getNumForceBuffers(), force)); perParticleValueArgs.push_back(&globals->getDevicePointer());
// if (useLong) for (int i = 0; i < (int) params->getBuffers().size(); i++)
// cu.addAutoclearBuffer(longEnergyDerivs->getDevicePointer(), 2*longEnergyDerivs->getSize()); perParticleValueArgs.push_back(&params->getBuffers()[i].getMemory());
// else { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { perParticleValueArgs.push_back(&computedValues->getBuffers()[i].getMemory());
// const CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; if (tabulatedFunctionParams != NULL) {
// cu.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float)); for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// } perParticleValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
// } perParticleValueArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
//} }
// pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
//double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
// bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities(); pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
// if (!hasInitializedKernels) { pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusionIndices().getDevicePointer());
// hasInitializedKernels = true; pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusionRowIndices().getDevicePointer());
// maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0); if (nb.getUseCutoff()) {
// bool useLong = (cu.getSupports64BitGlobalAtomics() && !deviceIsCpu); pairEnergyArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
// if (useLong) { pairEnergyArgs.push_back(&nb.getInteractionCount().getDevicePointer());
// longValueBuffers = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "customGBLongValueBuffers"); pairEnergyArgs.push_back(cu.getPeriodicBoxSizePointer());
// cu.addAutoclearBuffer(longValueBuffers->getDevicePointer(), 2*longValueBuffers->getSize()); pairEnergyArgs.push_back(cu.getInvPeriodicBoxSizePointer());
// cu.clearBuffer(longValueBuffers->getDevicePointer(), 2*longValueBuffers->getSize()); pairEnergyArgs.push_back(&maxTiles);
// } pairEnergyArgs.push_back(&nb.getInteractionFlags().getDevicePointer());
// else { }
// valueBuffers = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "customGBValueBuffers"); else
// cu.addAutoclearBuffer(valueBuffers->getDevicePointer(), valueBuffers->getSize()); pairEnergyArgs.push_back(&maxTiles);
// cu.clearBuffer(*valueBuffers); if (globals != NULL)
// } pairEnergyArgs.push_back(&globals->getDevicePointer());
// int index = 0; for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// pairValueKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer()); if (pairEnergyUsesParam[i])
// pairValueKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*sizeof(cl_float4), NULL); pairEnergyArgs.push_back(&params->getBuffers()[i].getMemory());
// pairValueKernel.setArg<cu::Buffer>(index++, cu.getNonbondedUtilities().getExclusions().getDevicePointer()); }
// pairValueKernel.setArg<cu::Buffer>(index++, cu.getNonbondedUtilities().getExclusionIndices().getDevicePointer()); for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// pairValueKernel.setArg<cu::Buffer>(index++, cu.getNonbondedUtilities().getExclusionRowIndices().getDevicePointer()); if (pairEnergyUsesValue[i])
// pairValueKernel.setArg<cu::Buffer>(index++, useLong ? longValueBuffers->getDevicePointer() : valueBuffers->getDevicePointer()); pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
// pairValueKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*sizeof(cl_float), NULL); }
// /// \todo Eliminate this argument and make local to the kernel. For *_default.cu kernel can actually make it TileSize rather than getForceThreadBlockSize as only half the workgroup stores to it as was done with nonbonded_default.cu. pairEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
// /// \todo Also make the previous __local argument local as was done with nonbonded_default.cu. if (tabulatedFunctionParams != NULL) {
// pairValueKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*sizeof(cl_float), NULL); for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// if (nb.getUseCutoff()) { pairEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
// pairValueKernel.setArg<cu::Buffer>(index++, nb.getInteractingTiles().getDevicePointer()); pairEnergyArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
// pairValueKernel.setArg<cu::Buffer>(index++, nb.getInteractionCount().getDevicePointer()); }
// index += 2; // Periodic box size arguments are set when the kernel is executed. perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
// pairValueKernel.setArg<cl_uint>(index++, maxTiles); perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
// if (cu.getSIMDWidth() == 32 || deviceIsCpu) perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
// pairValueKernel.setArg<cu::Buffer>(index++, nb.getInteractionFlags().getDevicePointer()); if (globals != NULL)
// } perParticleEnergyArgs.push_back(&globals->getDevicePointer());
// else for (int i = 0; i < (int) params->getBuffers().size(); i++)
// pairValueKernel.setArg<cl_uint>(index++, cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2); perParticleEnergyArgs.push_back(&params->getBuffers()[i].getMemory());
// if (globals != NULL) for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
// pairValueKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer()); perParticleEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
// if (pairValueUsesParam[i]) { perParticleEnergyArgs.push_back(&energyDerivs->getBuffers()[i].getMemory());
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; perParticleEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
// pairValueKernel.setArg<cu::Memory>(index++, buffer.getMemory()); if (tabulatedFunctionParams != NULL) {
// pairValueKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL); for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// } perParticleEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
// } perParticleEnergyArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
// if (tabulatedFunctionParams != NULL) { }
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++) if (needParameterGradient) {
// pairValueKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer()); gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
// pairValueKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer()); gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
// } if (globals != NULL)
// index = 0; gradientChainRuleArgs.push_back(&globals->getDevicePointer());
// perParticleValueKernel.setArg<cl_int>(index++, cu.getPaddedNumAtoms()); for (int i = 0; i < (int) params->getBuffers().size(); i++)
// perParticleValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers()); gradientChainRuleArgs.push_back(&params->getBuffers()[i].getMemory());
// perParticleValueKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer()); for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
// perParticleValueKernel.setArg<cu::Buffer>(index++, useLong ? longValueBuffers->getDevicePointer() : valueBuffers->getDevicePointer()); gradientChainRuleArgs.push_back(&computedValues->getBuffers()[i].getMemory());
// if (globals != NULL) for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
// perParticleValueKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer()); gradientChainRuleArgs.push_back(&energyDerivs->getBuffers()[i].getMemory());
// for (int i = 0; i < (int) params->getBuffers().size(); i++) }
// perParticleValueKernel.setArg<cu::Memory>(index++, params->getBuffers()[i].getMemory()); }
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) if (globals != NULL) {
// perParticleValueKernel.setArg<cu::Memory>(index++, computedValues->getBuffers()[i].getMemory()); bool changed = false;
// if (tabulatedFunctionParams != NULL) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++) float value = (float) context.getParameter(globalParamNames[i]);
// perParticleValueKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer()); if (value != globalParamValues[i])
// perParticleValueKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer()); changed = true;
// } globalParamValues[i] = value;
// index = 0; }
// pairEnergyKernel.setArg<cu::Buffer>(index++, useLong ? cu.getLongForceBuffer().getDevicePointer() : cu.getForce().getDevicePointer()); if (changed)
// pairEnergyKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer()); globals->upload(globalParamValues);
// pairEnergyKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*sizeof(cl_float4), NULL); }
// pairEnergyKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer()); if (nb.getUseCutoff()) {
// pairEnergyKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*sizeof(cl_float4), NULL); if (maxTiles < nb.getInteractingTiles().getSize()) {
// pairEnergyKernel.setArg<cu::Buffer>(index++, cu.getNonbondedUtilities().getExclusions().getDevicePointer()); maxTiles = nb.getInteractingTiles().getSize();
// pairEnergyKernel.setArg<cu::Buffer>(index++, cu.getNonbondedUtilities().getExclusionIndices().getDevicePointer()); pairValueArgs[5] = &nb.getInteractingTiles().getDevicePointer();
// pairEnergyKernel.setArg<cu::Buffer>(index++, cu.getNonbondedUtilities().getExclusionRowIndices().getDevicePointer()); pairEnergyArgs[6] = &nb.getInteractingTiles().getDevicePointer();
// /// \todo Eliminate this argument and make local to the kernel. For *_default.cu kernel can actually make it TileSize rather than getForceThreadBlockSize as only half the workgroup stores to it as was done with nonbonded_default.cu. pairValueArgs[10] = &nb.getInteractionFlags().getDevicePointer();
// /// \todo Also make the previous __local argument local as was done with nonbonded_default.cu. pairEnergyArgs[11] = &nb.getInteractionFlags().getDevicePointer();
// pairEnergyKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*sizeof(cl_float4), NULL); }
// if (nb.getUseCutoff()) { }
// pairEnergyKernel.setArg<cu::Buffer>(index++, nb.getInteractingTiles().getDevicePointer()); cu.executeKernel(pairValueKernel, &pairValueArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
// pairEnergyKernel.setArg<cu::Buffer>(index++, nb.getInteractionCount().getDevicePointer()); cu.executeKernel(perParticleValueKernel, &perParticleValueArgs[0], cu.getPaddedNumAtoms());
// index += 2; // Periodic box size arguments are set when the kernel is executed. cu.executeKernel(pairEnergyKernel, &pairEnergyArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
// pairEnergyKernel.setArg<cl_uint>(index++, maxTiles); cu.executeKernel(perParticleEnergyKernel, &perParticleEnergyArgs[0], cu.getPaddedNumAtoms());
// if (cu.getSIMDWidth() == 32 || deviceIsCpu) if (needParameterGradient)
// pairEnergyKernel.setArg<cu::Buffer>(index++, nb.getInteractionFlags().getDevicePointer()); cu.executeKernel(gradientChainRuleKernel, &gradientChainRuleArgs[0], cu.getPaddedNumAtoms());
// } return 0.0;
// else }
// pairEnergyKernel.setArg<cl_uint>(index++, cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
// if (globals != NULL) void CudaCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, const CustomGBForce& force) {
// pairEnergyKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer()); cu.setAsCurrent();
// for (int i = 0; i < (int) params->getBuffers().size(); i++) { int numParticles = force.getNumParticles();
// if (pairEnergyUsesParam[i]) { if (numParticles != cu.getNumAtoms())
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; throw OpenMMException("updateParametersInContext: The number of particles has changed");
// pairEnergyKernel.setArg<cu::Memory>(index++, buffer.getMemory());
// pairEnergyKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL); // Record the per-particle parameters.
// }
// } vector<vector<float> > paramVector(numParticles);
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { vector<double> parameters;
// if (pairEnergyUsesValue[i]) { for (int i = 0; i < numParticles; i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; force.getParticleParameters(i, parameters);
// pairEnergyKernel.setArg<cu::Memory>(index++, buffer.getMemory()); paramVector[i].resize(parameters.size());
// pairEnergyKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL); for (int j = 0; j < (int) parameters.size(); j++)
// } paramVector[i][j] = (float) parameters[j];
// } }
// if (useLong) { params->setParameterValues(paramVector);
// pairEnergyKernel.setArg<cu::Memory>(index++, longEnergyDerivs->getDevicePointer());
// for (int i = 0; i < numComputedValues; ++i) // Mark that the current reordering may be invalid.
// pairEnergyKernel.setArg(index++, nb.getForceThreadBlockSize()*sizeof(cl_float), NULL);
// } cu.invalidateMolecules();
// else { }
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
// pairEnergyKernel.setArg<cu::Memory>(index++, buffer.getMemory());
// pairEnergyKernel.setArg(index++, (deviceIsCpu ? CudaContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL);
// }
// }
// if (tabulatedFunctionParams != NULL) {
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// pairEnergyKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer());
// pairEnergyKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer());
// }
// index = 0;
// perParticleEnergyKernel.setArg<cl_int>(index++, cu.getPaddedNumAtoms());
// perParticleEnergyKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
// perParticleEnergyKernel.setArg<cu::Buffer>(index++, cu.getForce().getDevicePointer());
// perParticleEnergyKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
// perParticleEnergyKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// if (globals != NULL)
// perParticleEnergyKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer());
// for (int i = 0; i < (int) params->getBuffers().size(); i++)
// perParticleEnergyKernel.setArg<cu::Memory>(index++, params->getBuffers()[i].getMemory());
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
// perParticleEnergyKernel.setArg<cu::Memory>(index++, computedValues->getBuffers()[i].getMemory());
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
// perParticleEnergyKernel.setArg<cu::Memory>(index++, energyDerivs->getBuffers()[i].getMemory());
// if (useLong)
// perParticleEnergyKernel.setArg<cu::Memory>(index++, longEnergyDerivs->getDevicePointer());
// if (tabulatedFunctionParams != NULL) {
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// perParticleEnergyKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer());
// perParticleEnergyKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer());
// }
// if (needParameterGradient) {
// index = 0;
// gradientChainRuleKernel.setArg<cu::Buffer>(index++, cu.getForce().getDevicePointer());
// gradientChainRuleKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// if (globals != NULL)
// gradientChainRuleKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer());
// for (int i = 0; i < (int) params->getBuffers().size(); i++)
// gradientChainRuleKernel.setArg<cu::Memory>(index++, params->getBuffers()[i].getMemory());
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
// gradientChainRuleKernel.setArg<cu::Memory>(index++, computedValues->getBuffers()[i].getMemory());
// for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
// gradientChainRuleKernel.setArg<cu::Memory>(index++, energyDerivs->getBuffers()[i].getMemory());
// }
// }
// if (globals != NULL) {
// bool changed = false;
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
// if (value != globalParamValues[i])
// changed = true;
// globalParamValues[i] = value;
// }
// if (changed)
// globals->upload(globalParamValues);
// }
// if (nb.getUseCutoff()) {
// pairValueKernel.setArg<mm_float4>(10, cu.getPeriodicBoxSize());
// pairValueKernel.setArg<mm_float4>(11, cu.getInvPeriodicBoxSize());
// pairEnergyKernel.setArg<mm_float4>(11, cu.getPeriodicBoxSize());
// pairEnergyKernel.setArg<mm_float4>(12, cu.getInvPeriodicBoxSize());
// if (maxTiles < nb.getInteractingTiles().getSize()) {
// maxTiles = nb.getInteractingTiles().getSize();
// pairValueKernel.setArg<cu::Buffer>(8, nb.getInteractingTiles().getDevicePointer());
// pairValueKernel.setArg<cl_uint>(12, maxTiles);
// pairEnergyKernel.setArg<cu::Buffer>(9, nb.getInteractingTiles().getDevicePointer());
// pairEnergyKernel.setArg<cl_uint>(13, maxTiles);
// if (cu.getSIMDWidth() == 32 || deviceIsCpu) {
// pairValueKernel.setArg<cu::Buffer>(13, nb.getInteractionFlags().getDevicePointer());
// pairEnergyKernel.setArg<cu::Buffer>(14, nb.getInteractionFlags().getDevicePointer());
// }
// }
// }
// cu.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
// cu.executeKernel(perParticleValueKernel, cu.getPaddedNumAtoms());
// cu.executeKernel(pairEnergyKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
// cu.executeKernel(perParticleEnergyKernel, cu.getPaddedNumAtoms());
// if (needParameterGradient)
// cu.executeKernel(gradientChainRuleKernel, cu.getPaddedNumAtoms());
// return 0.0;
//}
//
//void CudaCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, const CustomGBForce& force) {
// cu.setAsCurrent();
// int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed");
//
// // Record the per-particle parameters.
//
// vector<vector<cl_float> > paramVector(numParticles);
// vector<double> parameters;
// for (int i = 0; i < numParticles; i++) {
// force.getParticleParameters(i, parameters);
// paramVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i][j] = (cl_float) parameters[j];
// }
// params->setParameterValues(paramVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
class CudaCustomExternalForceInfo : public CudaForceInfo { class CudaCustomExternalForceInfo : public CudaForceInfo {
public: public:
...@@ -3473,12 +3368,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -3473,12 +3368,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* __restrict__ globals"; extraArgs << ", const float* __restrict__ globals";
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
extraArgs << ", const "+buffer.getType()+"* __restrict__ donor"+buffer.getName(); extraArgs << ", const "+buffer.getType()+"* __restrict__ donor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cu.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cu.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n");
} }
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
extraArgs << ", const "+buffer.getType()+"* __restrict__ acceptor"+buffer.getName(); extraArgs << ", const "+buffer.getType()+"* __restrict__ acceptor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cu.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cu.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n");
} }
......
...@@ -715,58 +715,58 @@ private: ...@@ -715,58 +715,58 @@ private:
std::vector<void*> computeSumArgs, force1Args; std::vector<void*> computeSumArgs, force1Args;
}; };
///** /**
// * This kernel is invoked by CustomGBForce to calculate the forces acting on the system. * This kernel is invoked by CustomGBForce to calculate the forces acting on the system.
// */ */
//class CudaCalcCustomGBForceKernel : public CalcCustomGBForceKernel { class CudaCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
//public: public:
// CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomGBForceKernel(name, platform), CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomGBForceKernel(name, platform),
// hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), longEnergyDerivs(NULL), globals(NULL), hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), longEnergyDerivs(NULL), globals(NULL),
// valueBuffers(NULL), longValueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) { valueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) {
// } }
// ~CudaCalcCustomGBForceKernel(); ~CudaCalcCustomGBForceKernel();
// /** /**
// * Initialize the kernel. * Initialize the kernel.
// * *
// * @param system the System this kernel will be applied to * @param system the System this kernel will be applied to
// * @param force the CustomGBForce this kernel will be used for * @param force the CustomGBForce this kernel will be used for
// */ */
// void initialize(const System& system, const CustomGBForce& force); void initialize(const System& system, const CustomGBForce& force);
// /** /**
// * Execute the kernel to calculate the forces and/or energy. * Execute the kernel to calculate the forces and/or energy.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @param includeForces true if forces should be calculated * @param includeForces true if forces should be calculated
// * @param includeEnergy true if the energy should be calculated * @param includeEnergy true if the energy should be calculated
// * @return the potential energy due to the force * @return the potential energy due to the force
// */ */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /** /**
// * Copy changed parameters over to a context. * Copy changed parameters over to a context.
// * *
// * @param context the context to copy parameters to * @param context the context to copy parameters to
// * @param force the CustomGBForce to copy the parameters from * @param force the CustomGBForce to copy the parameters from
// */ */
// void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
//private: private:
// bool hasInitializedKernels, needParameterGradient; bool hasInitializedKernels, needParameterGradient;
// int maxTiles, numComputedValues; int maxTiles, numComputedValues;
// CudaContext& cu; CudaContext& cu;
// CudaParameterSet* params; CudaParameterSet* params;
// CudaParameterSet* computedValues; CudaParameterSet* computedValues;
// CudaParameterSet* energyDerivs; CudaParameterSet* energyDerivs;
// CudaArray<cl_long>* longEnergyDerivs; CudaArray* longEnergyDerivs;
// CudaArray<cl_float>* globals; CudaArray* globals;
// CudaArray<cl_float>* valueBuffers; CudaArray* valueBuffers;
// CudaArray<cl_long>* longValueBuffers; CudaArray* tabulatedFunctionParams;
// CudaArray<mm_float4>* tabulatedFunctionParams; std::vector<std::string> globalParamNames;
// std::vector<std::string> globalParamNames; std::vector<float> globalParamValues;
// std::vector<cl_float> globalParamValues; std::vector<CudaArray*> tabulatedFunctions;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions; std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
// std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue; System& system;
// System& system; CUfunction pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
// CUfunction pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel; std::vector<void*> pairValueArgs, perParticleValueArgs, pairEnergyArgs, perParticleEnergyArgs, gradientChainRuleArgs;
//}; };
/** /**
* This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system. * This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
......
...@@ -52,7 +52,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c ...@@ -52,7 +52,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
int multiprocessors; int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice())); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
numForceThreadBlocks = 2*multiprocessors; numForceThreadBlocks = 2*multiprocessors;
forceThreadBlockSize = 256; forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
} }
CudaNonbondedUtilities::~CudaNonbondedUtilities() { CudaNonbondedUtilities::~CudaNonbondedUtilities() {
...@@ -441,8 +441,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -441,8 +441,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
if ((localDataSize/4)%2 == 0 && !context.getUseDoublePrecision()) if ((localDataSize/4)%2 == 0 && !context.getUseDoublePrecision())
defines["PARAMETER_SIZE_IS_EVEN"] = "1"; defines["PARAMETER_SIZE_IS_EVEN"] = "1";
string file; CUmodule program = context.createModule(CudaKernelSources::vectorOps+context.replaceStrings(CudaKernelSources::nonbonded, replacements), defines);
CUmodule program = context.createModule(context.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::nonbonded, replacements), defines);
CUfunction kernel = context.getKernel(program, "computeNonbonded"); CUfunction kernel = context.getKernel(program, "computeNonbonded");
// Set arguments to the Kernel. // Set arguments to the Kernel.
......
#ifdef USE_CUTOFF
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2 && r2 < CUTOFF_SQUARED) {
#else
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
#endif
#ifdef USE_SYMMETRIC
real tempForce = 0;
#else
real3 tempForce1 = make_real3(0);
real3 tempForce2 = make_real3(0);
#endif
COMPUTE_FORCE
#ifdef USE_SYMMETRIC
dEdR += tempForce*invR;
#else
dEdR1 += tempForce1;
dEdR2 += tempForce2;
#endif
}
#define STORE_DERIVATIVE_1(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (deriv##INDEX##_1*0xFFFFFFFF)));
#define STORE_DERIVATIVE_2(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].deriv##INDEX*0xFFFFFFFF)));
#define TILE_SIZE 32
typedef struct {
real4 posq;
real3 force;
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
float padding;
#endif
} AtomData;
/**
* Compute a force based on pair interactions.
*/
extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const unsigned int* __restrict__ exclusionIndices,
const unsigned int* __restrict__ exclusionRowIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags
#else
unsigned int numTiles
#endif
PARAMETER_ARGUMENTS) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
unsigned int end = (warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
#else
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
#endif
real energy = 0;
unsigned int lasty = 0xFFFFFFFF;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
__shared__ unsigned int exclusionRange[2*WARPS_PER_GROUP];
__shared__ int exclusionIndex[WARPS_PER_GROUP];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE;
unsigned int x, y;
real3 force = make_real3(0);
DECLARE_ATOM1_DERIVATIVES
if (pos < end) {
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
y = tileIndices.y;
}
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
// Locate the exclusion data for this tile.
#ifdef USE_EXCLUSIONS
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (unsigned int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
bool hasExclusions = (exclusionIndex[localGroupIndex] > -1);
#else
bool hasExclusions = false;
#endif
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
const unsigned int localAtomIndex = threadIdx.x;
localData[localAtomIndex].posq = posq1;
LOAD_LOCAL_PARAMETERS_FROM_1
#ifdef USE_EXCLUSIONS
unsigned int excl = exclusions[exclusionIndex[localGroupIndex]+tgx];
#endif
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+j;
real4 posq2 = localData[atom2].posq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
real dEdR = 0;
real tempEnergy = 0;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
COMPUTE_INTERACTION
dEdR /= -r;
}
energy += 0.5f*tempEnergy;
delta *= dEdR;
force -= delta;
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
}
}
else {
// This is an off-diagonal tile.
const unsigned int localAtomIndex = threadIdx.x;
if (lasty != y) {
unsigned int j = y*TILE_SIZE + tgx;
localData[localAtomIndex].posq = posq[j];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
localData[localAtomIndex].force = make_real3(0);
CLEAR_LOCAL_DERIVATIVES
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (!hasExclusions && flags == 0) {
// No interactions in this tile.
}
else
#endif
{
// Compute the full set of interactions in this tile.
#ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndex[localGroupIndex]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+tj;
real4 posq2 = localData[atom2].posq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj;
real dEdR = 0;
real tempEnergy = 0;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
dEdR /= -r;
}
energy += tempEnergy;
delta *= dEdR;
force -= delta;
atom2 = tbx+tj;
localData[atom2].force += delta;
RECORD_DERIVATIVE_2
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
}
}
lasty = y;
// Write results.
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0xFFFFFFFF)));
STORE_DERIVATIVES_1
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
STORE_DERIVATIVES_2
}
pos++;
} while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) {
real energy = 0;
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the derivatives
LOAD_DERIVATIVES
// Now calculate the per-particle energy terms.
real4 pos = posq[index];
real3 force = make_real3(0, 0, 0);
COMPUTE_ENERGY
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
* Compute chain rule terms for computed values that depend explicitly on particle coordinates.
*/
extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) {
const real scale = RECIP((real) 0xFFFFFFFF);
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 pos = posq[index];
real3 force = make_real3(scale*forceBuffers[index], scale*forceBuffers[index+PADDED_NUM_ATOMS], scale*forceBuffers[index+PADDED_NUM_ATOMS*2]);
COMPUTE_FORCES
forceBuffers[index] = (long long) (force.x*0xFFFFFFFF);
forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0xFFFFFFFF);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0xFFFFFFFF);
}
}
#define TILE_SIZE 32
typedef struct {
real4 posq;
real value, temp;
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
float padding;
#endif
} AtomData;
/**
* Compute a value based on pair interactions.
*/
extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions,
const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, unsigned long long* __restrict__ global_value,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags
#else
unsigned int numTiles
#endif
PARAMETER_ARGUMENTS) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
unsigned int end = (warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
#else
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
#endif
real energy = 0;
unsigned int lasty = 0xFFFFFFFF;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
__shared__ unsigned int exclusionRange[2*WARPS_PER_GROUP];
__shared__ int exclusionIndex[WARPS_PER_GROUP];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE;
unsigned int x, y;
real value = 0;
if (pos < end) {
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
y = tileIndices.y;
}
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
// Locate the exclusion data for this tile.
#ifdef USE_EXCLUSIONS
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (unsigned int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
bool hasExclusions = (exclusionIndex[localGroupIndex] > -1);
#else
bool hasExclusions = false;
#endif
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
const unsigned int localAtomIndex = threadIdx.x;
localData[localAtomIndex].posq = posq1;
LOAD_LOCAL_PARAMETERS_FROM_1
#ifdef USE_EXCLUSIONS
unsigned int excl = exclusions[exclusionIndex[localGroupIndex]+tgx];
#endif
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+j;
real4 posq2 = localData[atom2].posq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
real tempValue1 = 0;
real tempValue2 = 0;
#ifdef USE_EXCLUSIONS
if (!isExcluded && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
#else
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
#endif
COMPUTE_VALUE
}
value += tempValue1;
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
}
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y*TILE_SIZE + tgx;
localData[threadIdx.x].posq = posq[j];
const unsigned int localAtomIndex = threadIdx.x;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
localData[threadIdx.x].value = 0;
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j;
real4 posq2 = localData[atom2].posq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real tempValue1 = 0;
real tempValue2 = 0;
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_VALUE
}
value += tempValue1;
}
localData[threadIdx.x].temp = tempValue2;
// Sum the forces on atom2.
if (tgx % 4 == 0)
localData[threadIdx.x].temp += localData[threadIdx.x+1].temp+localData[threadIdx.x+2].temp+localData[threadIdx.x+3].temp;
if (tgx == 0)
localData[tbx+j].value += localData[threadIdx.x].temp+localData[threadIdx.x+4].temp+localData[threadIdx.x+8].temp+localData[threadIdx.x+12].temp+localData[threadIdx.x+16].temp+localData[threadIdx.x+20].temp+localData[threadIdx.x+24].temp+localData[threadIdx.x+28].temp;
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
#ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndex[localGroupIndex]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+tj;
real4 posq2 = localData[atom2].posq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj;
real tempValue1 = 0;
real tempValue2 = 0;
#ifdef USE_EXCLUSIONS
if (!isExcluded && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#else
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif
COMPUTE_VALUE
}
value += tempValue1;
localData[tbx+tj].value += tempValue2;
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
}
}
// Write results.
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (value*0xFFFFFFFF)));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0xFFFFFFFF)));
}
lasty = y;
pos++;
} while (pos < end);
}
/**
* Reduce a pairwise computed value, and compute per-particle values.
*/
extern "C" __global__ void computePerParticleValues(real4* posq, long long* valueBuffers
PARAMETER_ARGUMENTS) {
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the pairwise value
real sum = valueBuffers[index]/(real) 0xFFFFFFFF;
// Now calculate other values
real4 pos = posq[index];
COMPUTE_VALUES
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CustomGBForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "sfmt/SFMT.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomGBForce.h"
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testOBC(GBSAOBCForce::NonbondedMethod obcMethod, CustomGBForce::NonbondedMethod customMethod) {
const int numMolecules = 70;
const int numParticles = numMolecules*2;
const double boxSize = 10.0;
CudaPlatform platform;
// Create two systems: one with a GBSAOBCForce, and one using a CustomGBForce to implement the same interaction.
System standardSystem;
System customSystem;
for (int i = 0; i < numParticles; i++) {
standardSystem.addParticle(1.0);
customSystem.addParticle(1.0);
}
standardSystem.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0.0, 0.0), Vec3(0.0, boxSize, 0.0), Vec3(0.0, 0.0, boxSize));
customSystem.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0.0, 0.0), Vec3(0.0, boxSize, 0.0), Vec3(0.0, 0.0, boxSize));
GBSAOBCForce* obc = new GBSAOBCForce();
CustomGBForce* custom = new CustomGBForce();
obc->setCutoffDistance(2.0);
custom->setCutoffDistance(2.0);
custom->addPerParticleParameter("q");
custom->addPerParticleParameter("radius");
custom->addPerParticleParameter("scale");
custom->addGlobalParameter("solventDielectric", obc->getSolventDielectric());
custom->addGlobalParameter("soluteDielectric", obc->getSoluteDielectric());
custom->addComputedValue("I", "step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);"
"U=r+sr2;"
"C=2*(1/or1-1/L)*step(sr2-r-or1);"
"L=max(or1, D);"
"D=abs(r-sr2);"
"sr2 = scale2*or2;"
"or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions);
custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);"
"psi=I*or; or=radius-0.009", CustomGBForce::SingleParticle);
custom->addEnergyTerm("28.3919551*(radius+0.14)^2*(radius/B)^6-0.5*138.935456*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle);
custom->addEnergyTerm("-138.935456*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;"
"f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<double> params(3);
for (int i = 0; i < numMolecules; i++) {
if (i < numMolecules/2) {
obc->addParticle(1.0, 0.2, 0.5);
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.5;
custom->addParticle(params);
obc->addParticle(-1.0, 0.1, 0.5);
params[0] = -1.0;
params[1] = 0.1;
custom->addParticle(params);
}
else {
obc->addParticle(1.0, 0.2, 0.8);
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.8;
custom->addParticle(params);
obc->addParticle(-1.0, 0.1, 0.8);
params[0] = -1.0;
params[1] = 0.1;
custom->addParticle(params);
}
positions[2*i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
velocities[2*i] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
velocities[2*i+1] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
}
obc->setNonbondedMethod(obcMethod);
custom->setNonbondedMethod(customMethod);
standardSystem.addForce(obc);
customSystem.addForce(custom);
VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01);
Context context1(standardSystem, integrator1, platform);
context1.setPositions(positions);
context1.setVelocities(velocities);
State state1 = context1.getState(State::Forces | State::Energy);
Context context2(customSystem, integrator2, platform);
context2.setPositions(positions);
context2.setVelocities(velocities);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-4);
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-4);
}
// Try changing the particle parameters and make sure it's still correct.
for (int i = 0; i < numMolecules/2; i++) {
obc->setParticleParameters(2*i, 1.1, 0.3, 0.6);
params[0] = 1.1;
params[1] = 0.3;
params[2] = 0.6;
custom->setParticleParameters(2*i, params);
obc->setParticleParameters(2*i+1, -1.1, 0.2, 0.4);
params[0] = -1.1;
params[1] = 0.2;
params[2] = 0.4;
custom->setParticleParameters(2*i+1, params);
}
obc->updateParametersInContext(context1);
custom->updateParametersInContext(context2);
state1 = context1.getState(State::Forces | State::Energy);
state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-4);
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-4);
}
}
void testMembrane() {
const int numMolecules = 70;
const int numParticles = numMolecules*2;
const double boxSize = 10.0;
CudaPlatform platform;
// Create a system with an implicit membrane.
System system;
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
}
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0.0, 0.0), Vec3(0.0, boxSize, 0.0), Vec3(0.0, 0.0, boxSize));
CustomGBForce* custom = new CustomGBForce();
custom->setCutoffDistance(2.0);
custom->addPerParticleParameter("q");
custom->addPerParticleParameter("radius");
custom->addPerParticleParameter("scale");
custom->addGlobalParameter("thickness", 3);
custom->addGlobalParameter("solventDielectric", 78.3);
custom->addGlobalParameter("soluteDielectric", 1);
custom->addComputedValue("Imol", "step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);"
"U=r+sr2;"
"C=2*(1/or1-1/L)*step(sr2-r-or1);"
"L=max(or1, D);"
"D=abs(r-sr2);"
"sr2 = scale2*or2;"
"or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions);
custom->addComputedValue("Imem", "(1/radius+2*log(2)/thickness)/(1+exp(7.2*(abs(z)+radius-0.5*thickness)))", CustomGBForce::SingleParticle);
custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);"
"psi=max(Imol,Imem)*or; or=radius-0.009", CustomGBForce::SingleParticle);
custom->addEnergyTerm("28.3919551*(radius+0.14)^2*(radius/B)^6-0.5*138.935456*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle);
custom->addEnergyTerm("-138.935456*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;"
"f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<double> params(3);
for (int i = 0; i < numMolecules; i++) {
if (i < numMolecules/2) {
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.5;
custom->addParticle(params);
params[0] = -1.0;
params[1] = 0.1;
custom->addParticle(params);
}
else {
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.8;
custom->addParticle(params);
params[0] = -1.0;
params[1] = 0.1;
custom->addParticle(params);
}
positions[2*i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
velocities[2*i] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
velocities[2*i+1] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
}
system.addForce(custom);
VerletIntegrator integrator(0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocities(velocities);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
double norm = 0.0;
for (int i = 0; i < (int) forces.size(); ++i)
norm += forces[i].dot(forces[i]);
norm = std::sqrt(norm);
const double stepSize = 1e-3;
double step = stepSize/norm;
for (int i = 0; i < (int) positions.size(); ++i) {
Vec3 p = positions[i];
Vec3 f = forces[i];
positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
}
context.setPositions(positions);
State state2 = context.getState(State::Energy);
ASSERT_EQUAL_TOL(norm, (state2.getPotentialEnergy()-state.getPotentialEnergy())/stepSize, 1e-2);
}
void testTabulatedFunction() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomGBForce* force = new CustomGBForce();
force->addComputedValue("a", "0", CustomGBForce::ParticlePair);
force->addEnergyTerm("fn(r)+1", CustomGBForce::ParticlePair);
force->addParticle(vector<double>());
force->addParticle(vector<double>());
vector<double> table;
for (int i = 0; i < 21; i++)
table.push_back(std::sin(0.25*i));
force->addFunction("fn", table, 1.0, 6.0);
system.addForce(force);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
for (int i = 1; i < 30; i++) {
double x = (7.0/30.0)*i;
positions[1] = Vec3(x, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
double force = (x < 1.0 || x > 6.0 ? 0.0 : -std::cos(x-1.0));
double energy = (x < 1.0 || x > 6.0 ? 0.0 : std::sin(x-1.0))+1.0;
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], 0.1);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], 0.1);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), 0.02);
}
for (int i = 1; i < 20; i++) {
double x = 0.25*i+1.0;
positions[1] = Vec3(x, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Energy);
double energy = (x < 1.0 || x > 6.0 ? 0.0 : std::sin(x-1.0))+1.0;
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), 1e-4);
}
}
void testMultipleChainRules() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomGBForce* force = new CustomGBForce();
force->addComputedValue("a", "2*r", CustomGBForce::ParticlePair);
force->addComputedValue("b", "a+1", CustomGBForce::SingleParticle);
force->addComputedValue("c", "2*b+a", CustomGBForce::SingleParticle);
force->addEnergyTerm("0.1*a+1*b+10*c", CustomGBForce::SingleParticle); // 0.1*(2*r) + 2*r+1 + 10*(3*a+2) = 0.2*r + 2*r+1 + 40*r+20+20*r = 62.2*r+21
force->addParticle(vector<double>());
force->addParticle(vector<double>());
system.addForce(force);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
for (int i = 1; i < 5; i++) {
positions[1] = Vec3(i, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(124.4, 0, 0), forces[0], 1e-4);
ASSERT_EQUAL_VEC(Vec3(-124.4, 0, 0), forces[1], 1e-4);
ASSERT_EQUAL_TOL(2*(62.2*i+21), state.getPotentialEnergy(), 0.02);
}
}
void testPositionDependence() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomGBForce* force = new CustomGBForce();
force->addComputedValue("a", "r", CustomGBForce::ParticlePair);
force->addComputedValue("b", "a+x*y", CustomGBForce::SingleParticle);
force->addEnergyTerm("b*z", CustomGBForce::SingleParticle);
force->addEnergyTerm("b1+b2", CustomGBForce::ParticlePair); // = 2*r+x1*y1+x2*y2
force->addParticle(vector<double>());
force->addParticle(vector<double>());
system.addForce(force);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
vector<Vec3> forces(2);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < 5; i++) {
positions[0] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
positions[1] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
Vec3 delta = positions[0]-positions[1];
double r = sqrt(delta.dot(delta));
double energy = 2*r+positions[0][0]*positions[0][1]+positions[1][0]*positions[1][1];
for (int j = 0; j < 2; j++)
energy += positions[j][2]*(r+positions[j][0]*positions[j][1]);
Vec3 force1(-(1+positions[0][2])*delta[0]/r-(1+positions[0][2])*positions[0][1]-(1+positions[1][2])*delta[0]/r,
-(1+positions[0][2])*delta[1]/r-(1+positions[0][2])*positions[0][0]-(1+positions[1][2])*delta[1]/r,
-(1+positions[0][2])*delta[2]/r-(r+positions[0][0]*positions[0][1])-(1+positions[1][2])*delta[2]/r);
Vec3 force2((1+positions[0][2])*delta[0]/r+(1+positions[1][2])*delta[0]/r-(1+positions[1][2])*positions[1][1],
(1+positions[0][2])*delta[1]/r+(1+positions[1][2])*delta[1]/r-(1+positions[1][2])*positions[1][0],
(1+positions[0][2])*delta[2]/r+(1+positions[1][2])*delta[2]/r-(r+positions[1][0]*positions[1][1]));
ASSERT_EQUAL_VEC(force1, forces[0], 1e-4);
ASSERT_EQUAL_VEC(force2, forces[1], 1e-4);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), 0.02);
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
double norm = 0.0;
for (int i = 0; i < (int) forces.size(); ++i)
norm += forces[i].dot(forces[i]);
norm = std::sqrt(norm);
const double stepSize = 1e-3;
double step = stepSize/norm;
for (int i = 0; i < (int) positions.size(); ++i) {
Vec3 p = positions[i];
Vec3 f = forces[i];
positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
}
context.setPositions(positions);
State state2 = context.getState(State::Energy);
ASSERT_EQUAL_TOL(norm, (state2.getPotentialEnergy()-state.getPotentialEnergy())/stepSize, 1e-3*abs(state.getPotentialEnergy()));
}
}
void testExclusions() {
CudaPlatform platform;
for (int i = 0; i < 4; i++) {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomGBForce* force = new CustomGBForce();
force->addComputedValue("a", "r", i < 2 ? CustomGBForce::ParticlePair : CustomGBForce::ParticlePairNoExclusions);
force->addEnergyTerm("a", CustomGBForce::SingleParticle);
force->addEnergyTerm("(1+a1+a2)*r", i%2 == 0 ? CustomGBForce::ParticlePair : CustomGBForce::ParticlePairNoExclusions);
force->addParticle(vector<double>());
force->addParticle(vector<double>());
force->addExclusion(0, 1);
system.addForce(force);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
double f, energy;
switch (i)
{
case 0: // e = 0
f = 0;
energy = 0;
break;
case 1: // e = r
f = 1;
energy = 1;
break;
case 2: // e = 2r
f = 2;
energy = 2;
break;
case 3: // e = 3r + 2r^2
f = 7;
energy = 5;
break;
default:
ASSERT(false);
}
ASSERT_EQUAL_VEC(Vec3(f, 0, 0), forces[0], 1e-4);
ASSERT_EQUAL_VEC(Vec3(-f, 0, 0), forces[1], 1e-4);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), 1e-4);
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
double norm = 0.0;
for (int i = 0; i < (int) forces.size(); ++i)
norm += forces[i].dot(forces[i]);
norm = std::sqrt(norm);
const double stepSize = 1e-3;
double step = stepSize/norm;
for (int i = 0; i < (int) positions.size(); ++i) {
Vec3 p = positions[i];
Vec3 f = forces[i];
positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
}
context.setPositions(positions);
State state2 = context.getState(State::Energy);
ASSERT_EQUAL_TOL(norm, (state2.getPotentialEnergy()-state.getPotentialEnergy())/stepSize, 1e-3*abs(state.getPotentialEnergy()));
}
}
int main() {
try {
testOBC(GBSAOBCForce::NoCutoff, CustomGBForce::NoCutoff);
testOBC(GBSAOBCForce::CutoffNonPeriodic, CustomGBForce::CutoffNonPeriodic);
testOBC(GBSAOBCForce::CutoffPeriodic, CustomGBForce::CutoffPeriodic);
testMembrane();
testTabulatedFunction();
testMultipleChainRules();
testPositionDependence();
testExclusions();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment