Commit bd22eada authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement new CUDA platform: CustomNonbondedForce, CustomHbondForce, CustomIntegrator

parent 8eb6850d
...@@ -53,7 +53,7 @@ CudaArray::~CudaArray() { ...@@ -53,7 +53,7 @@ CudaArray::~CudaArray() {
} }
} }
void CudaArray::upload(void* data, bool blocking) { void CudaArray::upload(const void* data, bool blocking) {
CUresult result; CUresult result;
if (blocking) if (blocking)
result = cuMemcpyHtoD(pointer, data, size*elementSize); result = cuMemcpyHtoD(pointer, data, size*elementSize);
......
...@@ -94,7 +94,7 @@ public: ...@@ -94,7 +94,7 @@ public:
* Copy the values in a vector to the device memory. * Copy the values in a vector to the device memory.
*/ */
template <class T> template <class T>
void upload(std::vector<T>& data) { void upload(const std::vector<T>& data) {
if (sizeof(T) != elementSize || data.size() != size) if (sizeof(T) != elementSize || data.size() != size)
throw OpenMMException("Error uploading array "+name+": The specified vector does not match the size of the array"); throw OpenMMException("Error uploading array "+name+": The specified vector does not match the size of the array");
upload(&data[0], true); upload(&data[0], true);
...@@ -117,7 +117,7 @@ public: ...@@ -117,7 +117,7 @@ public:
* @param blocking if true, this call will block until the transfer is complete. If false, * @param blocking if true, this call will block until the transfer is complete. If false,
* the source array must be in page-locked memory. * the source array must be in page-locked memory.
*/ */
void upload(void* data, bool blocking = true); void upload(const void* data, bool blocking = true);
/** /**
* Copy the values in the device memory to an array. * Copy the values in the device memory to an array.
* *
......
...@@ -945,6 +945,10 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) { ...@@ -945,6 +945,10 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
reorderListeners[i]->execute(); reorderListeners[i]->execute();
} }
void CudaContext::addReorderListener(ReorderListener* listener) {
reorderListeners.push_back(listener);
}
struct CudaContext::WorkThread::ThreadData { struct CudaContext::WorkThread::ThreadData {
ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished, ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) : pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
...@@ -94,16 +94,16 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -94,16 +94,16 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaCalcCustomTorsionForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomTorsionForceKernel(name, platform, cu, context.getSystem());
if (name == CalcNonbondedForceKernel::Name()) if (name == CalcNonbondedForceKernel::Name())
return new CudaCalcNonbondedForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcNonbondedForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name()) if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaCalcCustomNonbondedForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomNonbondedForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcGBSAOBCForceKernel::Name()) // if (name == CalcGBSAOBCForceKernel::Name())
// return new CudaCalcGBSAOBCForceKernel(name, platform, cu); // return new CudaCalcGBSAOBCForceKernel(name, platform, cu);
// if (name == CalcCustomGBForceKernel::Name()) // if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem()); // return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem());
if (name == CalcCustomExternalForceKernel::Name()) if (name == CalcCustomExternalForceKernel::Name())
return new CudaCalcCustomExternalForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomExternalForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name()) if (name == CalcCustomHbondForceKernel::Name())
// return new CudaCalcCustomHbondForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomHbondForceKernel(name, platform, cu, context.getSystem());
if (name == CalcCustomCompoundBondForceKernel::Name()) if (name == CalcCustomCompoundBondForceKernel::Name())
return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem());
if (name == IntegrateVerletStepKernel::Name()) if (name == IntegrateVerletStepKernel::Name())
...@@ -116,8 +116,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -116,8 +116,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaIntegrateVariableVerletStepKernel(name, platform, cu); return new CudaIntegrateVariableVerletStepKernel(name, platform, cu);
if (name == IntegrateVariableLangevinStepKernel::Name()) if (name == IntegrateVariableLangevinStepKernel::Name())
return new CudaIntegrateVariableLangevinStepKernel(name, platform, cu); return new CudaIntegrateVariableLangevinStepKernel(name, platform, cu);
// if (name == IntegrateCustomStepKernel::Name()) if (name == IntegrateCustomStepKernel::Name())
// return new CudaIntegrateCustomStepKernel(name, platform, cu); return new CudaIntegrateCustomStepKernel(name, platform, cu);
if (name == ApplyAndersenThermostatKernel::Name()) if (name == ApplyAndersenThermostatKernel::Name())
return new CudaApplyAndersenThermostatKernel(name, platform, cu); return new CudaApplyAndersenThermostatKernel(name, platform, cu);
if (name == ApplyMonteCarloBarostatKernel::Name()) if (name == ApplyMonteCarloBarostatKernel::Name())
......
...@@ -1669,277 +1669,277 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1669,277 +1669,277 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
cu.invalidateMolecules(); cu.invalidateMolecules();
} }
//class CudaCustomNonbondedForceInfo : public CudaForceInfo { class CudaCustomNonbondedForceInfo : public CudaForceInfo {
public:
CudaCustomNonbondedForceInfo(const CustomNonbondedForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector<double> params1;
vector<double> params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, vector<int>& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomNonbondedForce& force;
};
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
cu.setAsCurrent();
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+cu.intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
vector<vector<float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector<double> parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
CudaExpressionUtilities::FunctionPlaceholder fp;
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<float4> tabulatedFunctionParamsVec(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector<double> values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = prefix+"table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(CudaArray::create<float4>(cu, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = CudaArray::create<float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters");
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(float4), tabulatedFunctionParams->getDevicePointer()));
}
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map<string, Lepton::ParsedExpression> forceExpressions;
forceExpressions["tempEnergy += "] = energyExpression;
forceExpressions["tempForce -= "] = forceExpression;
// Create the kernels.
vector<pair<ExpressionTreeNode, string> > variables;
ExpressionTreeNode rnode(new Operation::Variable("r"));
variables.push_back(make_pair(rnode, "r"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables.push_back(makeVariable(name, prefix+value));
}
stringstream compute;
compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
map<string, string> replacements;
replacements["COMPUTE_FORCE"] = compute.str();
string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
}
cu.addForce(new CudaCustomNonbondedForceInfo(force));
}
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
cu.setAsCurrent();
int numParticles = force.getNumParticles();
if (numParticles != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the per-particle parameters.
vector<vector<float> > paramVector(numParticles);
vector<double> parameters;
for (int i = 0; i < numParticles; i++) {
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
//class CudaGBSAOBCForceInfo : public CudaForceInfo {
//public: //public:
// CudaCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : CudaForceInfo(requiredBuffers), force(force) { // CudaGBSAOBCForceInfo(int requiredBuffers, const GBSAOBCForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// } // }
// bool areParticlesIdentical(int particle1, int particle2) { // bool areParticlesIdentical(int particle1, int particle2) {
// vector<double> params1; // double charge1, charge2, radius1, radius2, scale1, scale2;
// vector<double> params2; // force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle1, params1); // force.getParticleParameters(particle2, charge2, radius2, scale2);
// force.getParticleParameters(particle2, params2); // return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// for (int i = 0; i < (int) params1.size(); i++)
// if (params1[i] != params2[i])
// return false;
// return true;
// }
// int getNumParticleGroups() {
// return force.getNumExclusions();
// }
// void getParticlesInGroup(int index, vector<int>& particles) {
// int particle1, particle2;
// force.getExclusionParticles(index, particle1, particle2);
// particles.resize(2);
// particles[0] = particle1;
// particles[1] = particle2;
// }
// bool areGroupsIdentical(int group1, int group2) {
// return true;
// } // }
//private: //private:
// const CustomNonbondedForce& force; // const GBSAOBCForce& force;
//}; //};
// //
//CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() { //CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
// cu.setAsCurrent(); // cu.setAsCurrent();
// if (params != NULL) // if (params != NULL)
// delete params; // delete params;
// if (globals != NULL) // if (bornSum != NULL)
// delete globals; // delete bornSum;
// if (tabulatedFunctionParams != NULL) // if (longBornSum != NULL)
// delete tabulatedFunctionParams; // delete longBornSum;
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++) // if (bornRadii != NULL)
// delete tabulatedFunctions[i]; // delete bornRadii;
// if (bornForce != NULL)
// delete bornForce;
// if (longBornForce != NULL)
// delete longBornForce;
// if (obcChain != NULL)
// delete obcChain;
//} //}
// //
//void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) { //void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
// cu.setAsCurrent(); // cu.setAsCurrent();
// int forceIndex; // if (cu.getPlatformData().contexts.size() > 1)
// for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) // throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// ; // CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// string prefix = "custom"+cu.intToString(forceIndex)+"_"; // params = new CudaArray<mm_float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
// // bornRadii = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
// // Record parameters and exclusions. // obcChain = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "obcChain");
// // if (cu.getSupports64BitGlobalAtomics()) {
// longBornSum = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornSum");
// longBornForce = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornForce");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornForce");
// cu.addAutoclearBuffer(longBornSum->getDevicePointer(), 2*longBornSum->getSize());
// cu.addAutoclearBuffer(longBornForce->getDevicePointer(), 2*longBornForce->getSize());
// }
// else {
// bornSum = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
// cu.addAutoclearBuffer(bornSum->getDevicePointer(), bornSum->getSize());
// cu.addAutoclearBuffer(bornForce->getDevicePointer(), bornForce->getSize());
// }
// CudaArray<mm_float4>& posq = cu.getPosq();
// int numParticles = force.getNumParticles(); // int numParticles = force.getNumParticles();
// params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters"); // vector<mm_float2> paramsVector(numParticles);
// if (force.getNumGlobalParameters() > 0) // const double dielectricOffset = 0.009;
// globals = new CudaArray<cl_float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY);
// vector<vector<cl_float> > paramVector(numParticles);
// vector<vector<int> > exclusionList(numParticles);
// for (int i = 0; i < numParticles; i++) { // for (int i = 0; i < numParticles; i++) {
// vector<double> parameters; // double charge, radius, scalingFactor;
// force.getParticleParameters(i, parameters); // force.getParticleParameters(i, charge, radius, scalingFactor);
// paramVector[i].resize(parameters.size()); // radius -= dielectricOffset;
// for (int j = 0; j < (int) parameters.size(); j++) // paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
// paramVector[i][j] = (cl_float) parameters[j]; // posq[i].w = (float) charge;
// exclusionList[i].push_back(i);
// }
// for (int i = 0; i < force.getNumExclusions(); i++) {
// int particle1, particle2;
// force.getExclusionParticles(i, particle1, particle2);
// exclusionList[particle1].push_back(particle2);
// exclusionList[particle2].push_back(particle1);
// } // }
// params->setParameterValues(paramVector); // posq.upload();
// params->upload(paramsVector);
// prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
// string source = CudaKernelSources::gbsaObc2;
// nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup());
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDevicePointer()));;
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "float", 1, sizeof(cl_float), bornForce->getDevicePointer()));;
// cu.addForce(new CudaGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
//}
// //
// // Record the tabulated functions. //double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// // CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// CudaExpressionUtilities::FunctionPlaceholder fp; // bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
// map<string, Lepton::CustomFunction*> functions; // if (!hasCreatedKernels) {
// vector<pair<string, string> > functionDefinitions; // // These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
// vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
// for (int i = 0; i < force.getNumFunctions(); i++) {
// string name;
// vector<double> values;
// double min, max;
// force.getFunctionParameters(i, name, values, min, max);
// string arrayName = prefix+"table"+cu.intToString(i);
// functionDefinitions.push_back(make_pair(name, arrayName));
// functions[name] = &fp;
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
// vector<mm_float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
// tabulatedFunctions.push_back(new CudaArray<mm_float4>(cu, values.size()-1, "TabulatedFunction"));
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
// }
// if (force.getNumFunctions() > 0) {
// tabulatedFunctionParams = new CudaArray<mm_float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDevicePointer()));
// }
//
// // Record information for the expressions.
//
// globalParamNames.resize(force.getNumGlobalParameters());
// globalParamValues.resize(force.getNumGlobalParameters());
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// globalParamNames[i] = force.getGlobalParameterName(i);
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
// }
// if (globals != NULL)
// globals->upload(globalParamValues);
// bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
// Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
// Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
// map<string, Lepton::ParsedExpression> forceExpressions;
// forceExpressions["tempEnergy += "] = energyExpression;
// forceExpressions["tempForce -= "] = forceExpression;
//
// // Create the kernels.
//
// vector<pair<ExpressionTreeNode, string> > variables;
// ExpressionTreeNode rnode(new Operation::Variable("r"));
// variables.push_back(make_pair(rnode, "r"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
// const string& name = force.getPerParticleParameterName(i);
// variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
// variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
// }
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// const string& name = force.getGlobalParameterName(i);
// string value = "globals["+cu.intToString(i)+"]";
// variables.push_back(makeVariable(name, prefix+value));
// }
// stringstream compute;
// compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
// map<string, string> replacements;
// replacements["COMPUTE_FORCE"] = compute.str();
// string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
// cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
// for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
// }
// if (globals != NULL) {
// globals->upload(globalParamValues);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDevicePointer()));
// }
// cu.addForce(new CudaCustomNonbondedForceInfo(cu.getNonbondedUtilities().getNumForceBuffers(), force));
//}
//
//double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// if (globals != NULL) {
// bool changed = false;
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
// if (value != globalParamValues[i])
// changed = true;
// globalParamValues[i] = value;
// }
// if (changed)
// globals->upload(globalParamValues);
// }
// return 0.0;
//}
//
//void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
// cu.setAsCurrent();
// int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed");
//
// // Record the per-particle parameters.
//
// vector<vector<cl_float> > paramVector(numParticles);
// vector<double> parameters;
// for (int i = 0; i < numParticles; i++) {
// force.getParticleParameters(i, parameters);
// paramVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i][j] = (cl_float) parameters[j];
// }
// params->setParameterValues(paramVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
//
//class CudaGBSAOBCForceInfo : public CudaForceInfo {
//public:
// CudaGBSAOBCForceInfo(int requiredBuffers, const GBSAOBCForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, radius1, radius2, scale1, scale2;
// force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle2, charge2, radius2, scale2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// }
//private:
// const GBSAOBCForce& force;
//};
//
//CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
// cu.setAsCurrent();
// if (params != NULL)
// delete params;
// if (bornSum != NULL)
// delete bornSum;
// if (longBornSum != NULL)
// delete longBornSum;
// if (bornRadii != NULL)
// delete bornRadii;
// if (bornForce != NULL)
// delete bornForce;
// if (longBornForce != NULL)
// delete longBornForce;
// if (obcChain != NULL)
// delete obcChain;
//}
//
//void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
// cu.setAsCurrent();
// if (cu.getPlatformData().contexts.size() > 1)
// throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// params = new CudaArray<mm_float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
// bornRadii = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
// obcChain = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "obcChain");
// if (cu.getSupports64BitGlobalAtomics()) {
// longBornSum = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornSum");
// longBornForce = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornForce");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornForce");
// cu.addAutoclearBuffer(longBornSum->getDevicePointer(), 2*longBornSum->getSize());
// cu.addAutoclearBuffer(longBornForce->getDevicePointer(), 2*longBornForce->getSize());
// }
// else {
// bornSum = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
// cu.addAutoclearBuffer(bornSum->getDevicePointer(), bornSum->getSize());
// cu.addAutoclearBuffer(bornForce->getDevicePointer(), bornForce->getSize());
// }
// CudaArray<mm_float4>& posq = cu.getPosq();
// int numParticles = force.getNumParticles();
// vector<mm_float2> paramsVector(numParticles);
// const double dielectricOffset = 0.009;
// for (int i = 0; i < numParticles; i++) {
// double charge, radius, scalingFactor;
// force.getParticleParameters(i, charge, radius, scalingFactor);
// radius -= dielectricOffset;
// paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
// posq[i].w = (float) charge;
// }
// posq.upload();
// params->upload(paramsVector);
// prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
// string source = CudaKernelSources::gbsaObc2;
// nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup());
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDevicePointer()));;
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "float", 1, sizeof(cl_float), bornForce->getDevicePointer()));;
// cu.addForce(new CudaGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
//}
//
//double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
// if (!hasCreatedKernels) {
// // These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
// //
// hasCreatedKernels = true; // hasCreatedKernels = true;
// maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0); // maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
...@@ -3172,557 +3172,524 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con ...@@ -3172,557 +3172,524 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
cu.invalidateMolecules(); cu.invalidateMolecules();
} }
//class CudaCustomHbondForceInfo : public CudaForceInfo { class CudaCustomHbondForceInfo : public CudaForceInfo {
//public: public:
// CudaCustomHbondForceInfo(int requiredBuffers, const CustomHbondForce& force) : CudaForceInfo(requiredBuffers), force(force) { CudaCustomHbondForceInfo(const CustomHbondForce& force) : force(force) {
// } }
// bool areParticlesIdentical(int particle1, int particle2) { bool areParticlesIdentical(int particle1, int particle2) {
// return true; return true;
// } }
// int getNumParticleGroups() { int getNumParticleGroups() {
// return force.getNumDonors()+force.getNumAcceptors()+force.getNumExclusions(); return force.getNumDonors()+force.getNumAcceptors()+force.getNumExclusions();
// } }
// void getParticlesInGroup(int index, vector<int>& particles) { void getParticlesInGroup(int index, vector<int>& particles) {
// int p1, p2, p3; int p1, p2, p3;
// vector<double> parameters; vector<double> parameters;
// if (index < force.getNumDonors()) { if (index < force.getNumDonors()) {
// force.getDonorParameters(index, p1, p2, p3, parameters); force.getDonorParameters(index, p1, p2, p3, parameters);
// particles.clear(); particles.clear();
// particles.push_back(p1); particles.push_back(p1);
// if (p2 > -1) if (p2 > -1)
// particles.push_back(p2); particles.push_back(p2);
// if (p3 > -1) if (p3 > -1)
// particles.push_back(p3); particles.push_back(p3);
// return; return;
// } }
// index -= force.getNumDonors(); index -= force.getNumDonors();
// if (index < force.getNumAcceptors()) { if (index < force.getNumAcceptors()) {
// force.getAcceptorParameters(index, p1, p2, p3, parameters); force.getAcceptorParameters(index, p1, p2, p3, parameters);
// particles.clear(); particles.clear();
// particles.push_back(p1); particles.push_back(p1);
// if (p2 > -1) if (p2 > -1)
// particles.push_back(p2); particles.push_back(p2);
// if (p3 > -1) if (p3 > -1)
// particles.push_back(p3); particles.push_back(p3);
// return; return;
// } }
// index -= force.getNumAcceptors(); index -= force.getNumAcceptors();
// int donor, acceptor; int donor, acceptor;
// force.getExclusionParticles(index, donor, acceptor); force.getExclusionParticles(index, donor, acceptor);
// particles.clear(); particles.clear();
// force.getDonorParameters(donor, p1, p2, p3, parameters); force.getDonorParameters(donor, p1, p2, p3, parameters);
// particles.push_back(p1); particles.push_back(p1);
// if (p2 > -1) if (p2 > -1)
// particles.push_back(p2); particles.push_back(p2);
// if (p3 > -1) if (p3 > -1)
// particles.push_back(p3); particles.push_back(p3);
// force.getAcceptorParameters(acceptor, p1, p2, p3, parameters); force.getAcceptorParameters(acceptor, p1, p2, p3, parameters);
// particles.push_back(p1); particles.push_back(p1);
// if (p2 > -1) if (p2 > -1)
// particles.push_back(p2); particles.push_back(p2);
// if (p3 > -1) if (p3 > -1)
// particles.push_back(p3); particles.push_back(p3);
// } }
// bool areGroupsIdentical(int group1, int group2) { bool areGroupsIdentical(int group1, int group2) {
// int p1, p2, p3; int p1, p2, p3;
// vector<double> params1, params2; vector<double> params1, params2;
// if (group1 < force.getNumDonors() && group2 < force.getNumDonors()) { if (group1 < force.getNumDonors() && group2 < force.getNumDonors()) {
// force.getDonorParameters(group1, p1, p2, p3, params1); force.getDonorParameters(group1, p1, p2, p3, params1);
// force.getDonorParameters(group2, p1, p2, p3, params2); force.getDonorParameters(group2, p1, p2, p3, params2);
// return (params1 == params2 && params1 == params2); return (params1 == params2 && params1 == params2);
// } }
// if (group1 < force.getNumDonors() || group2 < force.getNumDonors()) if (group1 < force.getNumDonors() || group2 < force.getNumDonors())
// return false; return false;
// group1 -= force.getNumDonors(); group1 -= force.getNumDonors();
// group2 -= force.getNumDonors(); group2 -= force.getNumDonors();
// if (group1 < force.getNumAcceptors() && group2 < force.getNumAcceptors()) { if (group1 < force.getNumAcceptors() && group2 < force.getNumAcceptors()) {
// force.getAcceptorParameters(group1, p1, p2, p3, params1); force.getAcceptorParameters(group1, p1, p2, p3, params1);
// force.getAcceptorParameters(group2, p1, p2, p3, params2); force.getAcceptorParameters(group2, p1, p2, p3, params2);
// return (params1 == params2 && params1 == params2); return (params1 == params2 && params1 == params2);
// } }
// if (group1 < force.getNumAcceptors() || group2 < force.getNumAcceptors()) if (group1 < force.getNumAcceptors() || group2 < force.getNumAcceptors())
// return false; return false;
// return true; return true;
// } }
//private: private:
// const CustomHbondForce& force; const CustomHbondForce& force;
//}; };
//
//CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() { CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
// cu.setAsCurrent(); cu.setAsCurrent();
// if (donorParams != NULL) if (donorParams != NULL)
// delete donorParams; delete donorParams;
// if (acceptorParams != NULL) if (acceptorParams != NULL)
// delete acceptorParams; delete acceptorParams;
// if (donors != NULL) if (donors != NULL)
// delete donors; delete donors;
// if (acceptors != NULL) if (acceptors != NULL)
// delete acceptors; delete acceptors;
// if (donorBufferIndices != NULL) if (globals != NULL)
// delete donorBufferIndices; delete globals;
// if (acceptorBufferIndices != NULL) if (donorExclusions != NULL)
// delete acceptorBufferIndices; delete donorExclusions;
// if (globals != NULL) if (acceptorExclusions != NULL)
// delete globals; delete acceptorExclusions;
// if (donorExclusions != NULL) if (tabulatedFunctionParams != NULL)
// delete donorExclusions; delete tabulatedFunctionParams;
// if (acceptorExclusions != NULL) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// delete acceptorExclusions; delete tabulatedFunctions[i];
// if (tabulatedFunctionParams != NULL) }
// delete tabulatedFunctionParams;
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++) static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
// delete tabulatedFunctions[i]; computeDonor << value;
//} computeAcceptor << value;
// }
//static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
// computeDonor << value; static void applyDonorAndAcceptorForces(stringstream& applyToDonor, stringstream& applyToAcceptor, int atom, const string& value) {
// computeAcceptor << value; string forceNames[] = {"f1", "f2", "f3"};
//} if (atom < 3)
// applyToAcceptor << forceNames[atom]<<" += trim("<<value<<");\n";
//static void applyDonorAndAcceptorForces(stringstream& applyToDonor, stringstream& applyToAcceptor, int atom, const string& value) { else
// string forceNames[] = {"f1", "f2", "f3"}; applyToDonor << forceNames[atom-3]<<" += trim("<<value<<");\n";
// if (atom < 3) }
// applyToAcceptor << forceNames[atom]<<".xyz += "<<value<<";\n";
// else void CudaCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) {
// applyToDonor << forceNames[atom-3]<<".xyz += "<<value<<";\n"; // Record the lists of donors and acceptors, and the parameters for each one.
//}
// cu.setAsCurrent();
//void CudaCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) { int numContexts = cu.getPlatformData().contexts.size();
// // Record the lists of donors and acceptors, and the parameters for each one. int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
// cu.setAsCurrent(); numDonors = endIndex-startIndex;
// int numContexts = cu.getPlatformData().contexts.size(); numAcceptors = force.getNumAcceptors();
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts; if (numDonors == 0 || numAcceptors == 0)
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts; return;
// numDonors = endIndex-startIndex; int numParticles = system.getNumParticles();
// numAcceptors = force.getNumAcceptors(); donors = CudaArray::create<int4>(cu, numDonors, "customHbondDonors");
// if (numDonors == 0 || numAcceptors == 0) acceptors = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptors");
// return; donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
// int numParticles = system.getNumParticles(); acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
// donors = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonors"); if (force.getNumGlobalParameters() > 0)
// acceptors = new CudaArray<mm_int4>(cu, numAcceptors, "customHbondAcceptors"); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
// donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters"); vector<vector<float> > donorParamVector(numDonors);
// acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters"); vector<int4> donorVector(numDonors);
// if (force.getNumGlobalParameters() > 0) for (int i = 0; i < numDonors; i++) {
// globals = new CudaArray<cl_float>(cu, force.getNumGlobalParameters(), "customHbondGlobals", false, CL_MEM_READ_ONLY); vector<double> parameters;
// vector<vector<cl_float> > donorParamVector(numDonors); force.getDonorParameters(startIndex+i, donorVector[i].x, donorVector[i].y, donorVector[i].z, parameters);
// vector<mm_int4> donorVector(numDonors); donorParamVector[i].resize(parameters.size());
// for (int i = 0; i < numDonors; i++) { for (int j = 0; j < (int) parameters.size(); j++)
// vector<double> parameters; donorParamVector[i][j] = (float) parameters[j];
// force.getDonorParameters(startIndex+i, donorVector[i].x, donorVector[i].y, donorVector[i].z, parameters); }
// donorParamVector[i].resize(parameters.size()); donors->upload(donorVector);
// for (int j = 0; j < (int) parameters.size(); j++) donorParams->setParameterValues(donorParamVector);
// donorParamVector[i][j] = (cl_float) parameters[j]; vector<vector<float> > acceptorParamVector(numAcceptors);
// } vector<int4> acceptorVector(numAcceptors);
// donors->upload(donorVector); for (int i = 0; i < numAcceptors; i++) {
// donorParams->setParameterValues(donorParamVector); vector<double> parameters;
// vector<vector<cl_float> > acceptorParamVector(numAcceptors); force.getAcceptorParameters(i, acceptorVector[i].x, acceptorVector[i].y, acceptorVector[i].z, parameters);
// vector<mm_int4> acceptorVector(numAcceptors); acceptorParamVector[i].resize(parameters.size());
// for (int i = 0; i < numAcceptors; i++) { for (int j = 0; j < (int) parameters.size(); j++)
// vector<double> parameters; acceptorParamVector[i][j] = (float) parameters[j];
// force.getAcceptorParameters(i, acceptorVector[i].x, acceptorVector[i].y, acceptorVector[i].z, parameters); }
// acceptorParamVector[i].resize(parameters.size()); acceptors->upload(acceptorVector);
// for (int j = 0; j < (int) parameters.size(); j++) acceptorParams->setParameterValues(acceptorParamVector);
// acceptorParamVector[i][j] = (cl_float) parameters[j]; cu.addForce(new CudaCustomHbondForceInfo(force));
// }
// acceptors->upload(acceptorVector); // Record exclusions.
// acceptorParams->setParameterValues(acceptorParamVector);
// vector<int4> donorExclusionVector(numDonors, make_int4(-1, -1, -1, -1));
// // Select an output buffer index for each donor and acceptor. vector<int4> acceptorExclusionVector(numAcceptors, make_int4(-1, -1, -1, -1));
// for (int i = 0; i < force.getNumExclusions(); i++) {
// donorBufferIndices = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonorBuffers"); int donor, acceptor;
// acceptorBufferIndices = new CudaArray<mm_int4>(cu, numAcceptors, "customHbondAcceptorBuffers"); force.getExclusionParticles(i, donor, acceptor);
// vector<mm_int4> donorBufferVector(numDonors); if (donor < startIndex || donor >= endIndex)
// vector<mm_int4> acceptorBufferVector(numAcceptors); continue;
// vector<int> donorBufferCounter(numParticles, 0); donor -= startIndex;
// for (int i = 0; i < numDonors; i++) if (donorExclusionVector[donor].x == -1)
// donorBufferVector[i] = mm_int4(donorVector[i].x > -1 ? donorBufferCounter[donorVector[i].x]++ : 0, donorExclusionVector[donor].x = acceptor;
// donorVector[i].y > -1 ? donorBufferCounter[donorVector[i].y]++ : 0, else if (donorExclusionVector[donor].y == -1)
// donorVector[i].z > -1 ? donorBufferCounter[donorVector[i].z]++ : 0, 0); donorExclusionVector[donor].y = acceptor;
// vector<int> acceptorBufferCounter(numParticles, 0); else if (donorExclusionVector[donor].z == -1)
// for (int i = 0; i < numAcceptors; i++) donorExclusionVector[donor].z = acceptor;
// acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0, else if (donorExclusionVector[donor].w == -1)
// acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0, donorExclusionVector[donor].w = acceptor;
// acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0); else
// donorBufferIndices->upload(donorBufferVector); throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per donor");
// acceptorBufferIndices->upload(acceptorBufferVector); if (acceptorExclusionVector[acceptor].x == -1)
// int maxBuffers = 1; acceptorExclusionVector[acceptor].x = donor;
// for (int i = 0; i < (int) donorBufferCounter.size(); i++) else if (acceptorExclusionVector[acceptor].y == -1)
// maxBuffers = max(maxBuffers, donorBufferCounter[i]); acceptorExclusionVector[acceptor].y = donor;
// for (int i = 0; i < (int) acceptorBufferCounter.size(); i++) else if (acceptorExclusionVector[acceptor].z == -1)
// maxBuffers = max(maxBuffers, acceptorBufferCounter[i]); acceptorExclusionVector[acceptor].z = donor;
// cu.addForce(new CudaCustomHbondForceInfo(maxBuffers, force)); else if (acceptorExclusionVector[acceptor].w == -1)
// acceptorExclusionVector[acceptor].w = donor;
// // Record exclusions. else
// throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
// vector<mm_int4> donorExclusionVector(numDonors, mm_int4(-1, -1, -1, -1)); }
// vector<mm_int4> acceptorExclusionVector(numAcceptors, mm_int4(-1, -1, -1, -1)); donorExclusions = CudaArray::create<int4>(cu, numDonors, "customHbondDonorExclusions");
// for (int i = 0; i < force.getNumExclusions(); i++) { acceptorExclusions = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
// int donor, acceptor; donorExclusions->upload(donorExclusionVector);
// force.getExclusionParticles(i, donor, acceptor); acceptorExclusions->upload(acceptorExclusionVector);
// if (donor < startIndex || donor >= endIndex)
// continue; // Record the tabulated functions.
// donor -= startIndex;
// if (donorExclusionVector[donor].x == -1) CudaExpressionUtilities::FunctionPlaceholder fp;
// donorExclusionVector[donor].x = acceptor; map<string, Lepton::CustomFunction*> functions;
// else if (donorExclusionVector[donor].y == -1) vector<pair<string, string> > functionDefinitions;
// donorExclusionVector[donor].y = acceptor; vector<float4> tabulatedFunctionParamsVec(force.getNumFunctions());
// else if (donorExclusionVector[donor].z == -1) stringstream tableArgs;
// donorExclusionVector[donor].z = acceptor; for (int i = 0; i < force.getNumFunctions(); i++) {
// else if (donorExclusionVector[donor].w == -1) string name;
// donorExclusionVector[donor].w = acceptor; vector<double> values;
// else double min, max;
// throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per donor"); force.getFunctionParameters(i, name, values, min, max);
// if (acceptorExclusionVector[acceptor].x == -1) string arrayName = "table"+cu.intToString(i);
// acceptorExclusionVector[acceptor].x = donor; functionDefinitions.push_back(make_pair(name, arrayName));
// else if (acceptorExclusionVector[acceptor].y == -1) functions[name] = &fp;
// acceptorExclusionVector[acceptor].y = donor; tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
// else if (acceptorExclusionVector[acceptor].z == -1) vector<float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
// acceptorExclusionVector[acceptor].z = donor; tabulatedFunctions.push_back(CudaArray::create<float4>(cu, values.size()-1, "TabulatedFunction"));
// else if (acceptorExclusionVector[acceptor].w == -1) tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
// acceptorExclusionVector[acceptor].w = donor; tableArgs << ", const float4* __restrict__ " << arrayName;
// else }
// throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor"); if (force.getNumFunctions() > 0) {
// } tabulatedFunctionParams = CudaArray::create<float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters");
// donorExclusions = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonorExclusions"); tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// acceptorExclusions = new CudaArray<mm_int4>(cu, numDonors, "customHbondAcceptorExclusions"); tableArgs << ", const float4* __restrict__ functionParams";
// donorExclusions->upload(donorExclusionVector); }
// acceptorExclusions->upload(acceptorExclusionVector);
// // Record information about parameters.
// // Record the tabulated functions.
// globalParamNames.resize(force.getNumGlobalParameters());
// CudaExpressionUtilities::FunctionPlaceholder fp; globalParamValues.resize(force.getNumGlobalParameters());
// map<string, Lepton::CustomFunction*> functions; for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// vector<pair<string, string> > functionDefinitions; globalParamNames[i] = force.getGlobalParameterName(i);
// vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions()); globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
// stringstream tableArgs; }
// for (int i = 0; i < force.getNumFunctions(); i++) { if (globals != NULL)
// string name; globals->upload(globalParamValues);
// vector<double> values; map<string, string> variables;
// double min, max; for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
// force.getFunctionParameters(i, name, values, min, max); const string& name = force.getPerDonorParameterName(i);
// string arrayName = "table"+cu.intToString(i); variables[name] = "donorParams"+donorParams->getParameterSuffix(i);
// functionDefinitions.push_back(make_pair(name, arrayName)); }
// functions[name] = &fp; for (int i = 0; i < force.getNumPerAcceptorParameters(); i++) {
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2); const string& name = force.getPerAcceptorParameterName(i);
// vector<mm_float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max); variables[name] = "acceptorParams"+acceptorParams->getParameterSuffix(i);
// tabulatedFunctions.push_back(new CudaArray<mm_float4>(cu, values.size()-1, "TabulatedFunction")); }
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// tableArgs << ", __global const float4* restrict " << arrayName; const string& name = force.getGlobalParameterName(i);
// } variables[name] = "globals["+cu.intToString(i)+"]";
// if (force.getNumFunctions() > 0) { }
// tabulatedFunctionParams = new CudaArray<mm_float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); // Now to generate the kernel. First, it needs to calculate all distances, angles,
// tableArgs << ", __global const float4* restrict functionParams"; // and dihedrals the expression depends on.
// }
// map<string, vector<int> > distances;
// // Record information about parameters. map<string, vector<int> > angles;
// map<string, vector<int> > dihedrals;
// globalParamNames.resize(force.getNumGlobalParameters()); Lepton::ParsedExpression energyExpression = CustomHbondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
// globalParamValues.resize(force.getNumGlobalParameters()); map<string, Lepton::ParsedExpression> forceExpressions;
// for (int i = 0; i < force.getNumGlobalParameters(); i++) { set<string> computedDeltas;
// globalParamNames[i] = force.getGlobalParameterName(i); computedDeltas.insert("D1A1");
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i); string atomNames[] = {"A1", "A2", "A3", "D1", "D2", "D3"};
// } string atomNamesLower[] = {"a1", "a2", "a3", "d1", "d2", "d3"};
// if (globals != NULL) stringstream computeDonor, computeAcceptor, extraArgs;
// globals->upload(globalParamValues); int index = 0;
// map<string, string> variables; for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
// for (int i = 0; i < force.getNumPerDonorParameters(); i++) { const vector<int>& atoms = iter->second;
// const string& name = force.getPerDonorParameterName(i); string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
// variables[name] = "donorParams"+donorParams->getParameterSuffix(i); if (computedDeltas.count(deltaName) == 0) {
// } addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
// for (int i = 0; i < force.getNumPerAcceptorParameters(); i++) { computedDeltas.insert(deltaName);
// const string& name = force.getPerAcceptorParameterName(i); }
// variables[name] = "acceptorParams"+acceptorParams->getParameterSuffix(i); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r_"+deltaName+" = SQRT(delta"+deltaName+".w);\n");
// } variables[iter->first] = "r_"+deltaName;
// for (int i = 0; i < force.getNumGlobalParameters(); i++) { forceExpressions["real dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
// const string& name = force.getGlobalParameterName(i); }
// variables[name] = "globals["+cu.intToString(i)+"]"; index = 0;
// } for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
// const vector<int>& atoms = iter->second;
// // Now to generate the kernel. First, it needs to calculate all distances, angles, string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
// // and dihedrals the expression depends on. string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
// string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
// map<string, vector<int> > distances; if (computedDeltas.count(deltaName1) == 0) {
// map<string, vector<int> > angles; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n");
// map<string, vector<int> > dihedrals; computedDeltas.insert(deltaName1);
// Lepton::ParsedExpression energyExpression = CustomHbondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals); }
// map<string, Lepton::ParsedExpression> forceExpressions; if (computedDeltas.count(deltaName2) == 0) {
// set<string> computedDeltas; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n");
// computedDeltas.insert("D1A1"); computedDeltas.insert(deltaName2);
// string atomNames[] = {"A1", "A2", "A3", "D1", "D2", "D3"}; }
// string atomNamesLower[] = {"a1", "a2", "a3", "d1", "d2", "d3"}; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
// stringstream computeDonor, computeAcceptor, extraArgs; variables[iter->first] = angleName;
// int index = 0; forceExpressions["real dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
// for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) { }
// const vector<int>& atoms = iter->second; index = 0;
// string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]]; for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
// if (computedDeltas.count(deltaName) == 0) { const vector<int>& atoms = iter->second;
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n"); string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]];
// computedDeltas.insert(deltaName); string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]];
// } string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]];
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r_"+deltaName+" = sqrt(delta"+deltaName+".w);\n"); string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
// variables[iter->first] = "r_"+deltaName; string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
// forceExpressions["float dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize(); string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
// } if (computedDeltas.count(deltaName1) == 0) {
// index = 0; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
// for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) { computedDeltas.insert(deltaName1);
// const vector<int>& atoms = iter->second; }
// string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]]; if (computedDeltas.count(deltaName2) == 0) {
// string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]]; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n");
// string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]; computedDeltas.insert(deltaName2);
// if (computedDeltas.count(deltaName1) == 0) { }
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n"); if (computedDeltas.count(deltaName3) == 0) {
// computedDeltas.insert(deltaName1); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n");
// } computedDeltas.insert(deltaName3);
// if (computedDeltas.count(deltaName2) == 0) { }
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
// computedDeltas.insert(deltaName2); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName2+" = computeCross(delta"+deltaName2+", delta"+deltaName3+");\n");
// } addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+dihedralName+" = computeAngle("+crossName1+", "+crossName2+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, dihedralName+" *= (delta"+deltaName1+".x*"+crossName2+".x + delta"+deltaName1+".y*"+crossName2+".y + delta"+deltaName1+".z*"+crossName2+".z < 0 ? -1 : 1);\n");
// variables[iter->first] = angleName; variables[iter->first] = dihedralName;
// forceExpressions["float dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize(); forceExpressions["real dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
// } }
// index = 0;
// for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) { // Next it needs to load parameters from global memory.
// const vector<int>& atoms = iter->second;
// string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]]; if (force.getNumGlobalParameters() > 0)
// string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]]; extraArgs << ", const float* __restrict__ globals";
// string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]]; for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
// string crossName1 = "cross_"+deltaName1+"_"+deltaName2; const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// string crossName2 = "cross_"+deltaName2+"_"+deltaName3; extraArgs << ", const "+buffer.getType()+"* __restrict__ donor"+buffer.getName();
// string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]]; addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cu.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n");
// if (computedDeltas.count(deltaName1) == 0) { }
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n"); for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
// computedDeltas.insert(deltaName1); const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
// } extraArgs << ", const "+buffer.getType()+"* __restrict__ acceptor"+buffer.getName();
// if (computedDeltas.count(deltaName2) == 0) { addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cu.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n"); }
// computedDeltas.insert(deltaName2);
// } // Now evaluate the expressions.
// if (computedDeltas.count(deltaName3) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n"); computeAcceptor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
// computedDeltas.insert(deltaName3); forceExpressions["energy += "] = energyExpression;
// } computeDonor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName2+" = computeCross(delta"+deltaName2+", delta"+deltaName3+");\n"); // Finally, apply forces to atoms.
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+dihedralName+" = computeAngle("+crossName1+", "+crossName2+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, dihedralName+" *= (delta"+deltaName1+".x*"+crossName2+".x + delta"+deltaName1+".y*"+crossName2+".y + delta"+deltaName1+".z*"+crossName2+".z < 0 ? -1 : 1);\n"); index = 0;
// variables[iter->first] = dihedralName; for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
// forceExpressions["float dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize(); const vector<int>& atoms = iter->second;
// } string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
// string value = "(dEdDistance"+cu.intToString(index)+"/r_"+deltaName+")*delta"+deltaName;
// // Next it needs to load parameters from global memory. applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "-"+value);
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], value);
// if (force.getNumGlobalParameters() > 0) }
// extraArgs << ", __global const float* restrict globals"; index = 0;
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i]; const vector<int>& atoms = iter->second;
// extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName(); string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cu.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n"); string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
// } addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real3 crossProd = cross(delta"+deltaName2+", delta"+deltaName1+");\n");
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real lengthCross = max(SQRT(dot(crossProd,crossProd)), 1e-6f);\n");
// extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName(); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real3 deltaCross0 = -cross(trim(delta"+deltaName1+"), crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName1+".w*lengthCross);\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cu.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real3 deltaCross2 = cross(trim(delta"+deltaName2+"), crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName2+".w*lengthCross);\n");
// } addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real3 deltaCross1 = -(deltaCross0+deltaCross2);\n");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "deltaCross0");
// // Now evaluate the expressions. applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "deltaCross1");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "deltaCross2");
// computeAcceptor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n");
// forceExpressions["energy += "] = energyExpression; }
// computeDonor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams"); index = 0;
// for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
// // Finally, apply forces to atoms. const vector<int>& atoms = iter->second;
// string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]];
// index = 0; string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]];
// for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) { string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]];
// const vector<int>& atoms = iter->second; string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
// string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]]; string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
// string value = "(dEdDistance"+cu.intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz"; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "-"+value); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r = SQRT(delta"+deltaName2+".w);\n");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], value); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 ff;\n");
// } addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.x = (-dEdDihedral"+cu.intToString(index)+"*r)/"+crossName1+".w;\n");
// index = 0; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.y = (delta"+deltaName1+".x*delta"+deltaName2+".x + delta"+deltaName1+".y*delta"+deltaName2+".y + delta"+deltaName1+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
// for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) { addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.z = (delta"+deltaName3+".x*delta"+deltaName2+".x + delta"+deltaName3+".y*delta"+deltaName2+".y + delta"+deltaName3+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
// const vector<int>& atoms = iter->second; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.w = (dEdDihedral"+cu.intToString(index)+"*r)/"+crossName2+".w;\n");
// string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]]; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 internalF0 = ff.x*"+crossName1+";\n");
// string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]]; addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 internalF3 = ff.w*"+crossName2+";\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 s = ff.y*internalF0 - ff.z*internalF3;\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 crossProd = cross(delta"+deltaName2+", delta"+deltaName1+");\n"); applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "internalF0");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float lengthCross = max(length(crossProd), 1e-6f);\n"); applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "s-internalF0");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross0 = -cross(delta"+deltaName1+", crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName1+".w*lengthCross);\n"); applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "-s-internalF3");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross2 = cross(delta"+deltaName2+", crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName2+".w*lengthCross);\n"); applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[3], "internalF3");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross1 = -(deltaCross0+deltaCross2);\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "deltaCross0.xyz"); }
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "deltaCross1.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "deltaCross2.xyz"); // Generate the kernels.
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n");
// } map<string, string> replacements;
// index = 0; replacements["COMPUTE_DONOR_FORCE"] = computeDonor.str();
// for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) { replacements["COMPUTE_ACCEPTOR_FORCE"] = computeAcceptor.str();
// const vector<int>& atoms = iter->second; replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
// string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]]; map<string, string> defines;
// string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]]; defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]]; defines["NUM_DONORS"] = cu.intToString(numDonors);
// string crossName1 = "cross_"+deltaName1+"_"+deltaName2; defines["NUM_ACCEPTORS"] = cu.intToString(numAcceptors);
// string crossName2 = "cross_"+deltaName2+"_"+deltaName3; defines["M_PI"] = cu.doubleToString(M_PI);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n"); if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r = sqrt(delta"+deltaName2+".w);\n"); defines["USE_CUTOFF"] = "1";
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 ff;\n"); defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.x = (-dEdDihedral"+cu.intToString(index)+"*r)/"+crossName1+".w;\n"); }
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.y = (delta"+deltaName1+".x*delta"+deltaName2+".x + delta"+deltaName1+".y*delta"+deltaName2+".y + delta"+deltaName1+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n"); if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff && force.getNonbondedMethod() != CustomHbondForce::CutoffNonPeriodic)
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.z = (delta"+deltaName3+".x*delta"+deltaName2+".x + delta"+deltaName3+".y*delta"+deltaName2+".y + delta"+deltaName3+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n"); defines["USE_PERIODIC"] = "1";
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.w = (dEdDihedral"+cu.intToString(index)+"*r)/"+crossName2+".w;\n"); if (force.getNumExclusions() > 0)
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF0 = ff.x*"+crossName1+";\n"); defines["USE_EXCLUSIONS"] = "1";
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF3 = ff.w*"+crossName2+";\n"); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customHbondForce, replacements), defines);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 s = ff.y*internalF0 - ff.z*internalF3;\n"); donorKernel = cu.getKernel(module, "computeDonorForces");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "internalF0.xyz"); acceptorKernel = cu.getKernel(module, "computeAcceptorForces");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "s.xyz-internalF0.xyz"); }
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "-s.xyz-internalF3.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[3], "internalF3.xyz"); double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n"); if (numDonors == 0 || numAcceptors == 0)
// } return 0.0;
// if (globals != NULL) {
// // Generate the kernels. bool changed = false;
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
// map<string, string> replacements; float value = (float) context.getParameter(globalParamNames[i]);
// replacements["COMPUTE_DONOR_FORCE"] = computeDonor.str(); if (value != globalParamValues[i])
// replacements["COMPUTE_ACCEPTOR_FORCE"] = computeAcceptor.str(); changed = true;
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); globalParamValues[i] = value;
// map<string, string> defines; }
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); if (changed)
// defines["NUM_DONORS"] = cu.intToString(numDonors); globals->upload(globalParamValues);
// defines["NUM_ACCEPTORS"] = cu.intToString(numAcceptors); }
// defines["M_PI"] = cu.doubleToString(M_PI); if (!hasInitializedKernel) {
// if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff) { hasInitializedKernel = true;
// defines["USE_CUTOFF"] = "1"; int index = 0;
// defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); donorArgs.push_back(&cu.getForce().getDevicePointer());
// } donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
// if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff && force.getNonbondedMethod() != CustomHbondForce::CutoffNonPeriodic) donorArgs.push_back(&cu.getPosq().getDevicePointer());
// defines["USE_PERIODIC"] = "1"; donorArgs.push_back(&donorExclusions->getDevicePointer());
// if (force.getNumExclusions() > 0) donorArgs.push_back(&donors->getDevicePointer());
// defines["USE_EXCLUSIONS"] = "1"; donorArgs.push_back(&acceptors->getDevicePointer());
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customHbondForce, replacements), defines); donorArgs.push_back(cu.getPeriodicBoxSizePointer());
// donorKernel = cu.getKernel(module, "computeDonorForces"); donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
// acceptorKernel = cu.getKernel(module, "computeAcceptorForces"); if (globals != NULL)
//} donorArgs.push_back(&globals->getDevicePointer());
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
//double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// if (numDonors == 0 || numAcceptors == 0) donorArgs.push_back(&buffer.getMemory());
// return 0.0; }
// if (globals != NULL) { for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
// bool changed = false; CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
// for (int i = 0; i < (int) globalParamNames.size(); i++) { donorArgs.push_back(&buffer.getMemory());
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]); }
// if (value != globalParamValues[i]) if (tabulatedFunctionParams != NULL) {
// changed = true; for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// globalParamValues[i] = value; donorArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
// } donorArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
// if (changed) }
// globals->upload(globalParamValues); index = 0;
// } acceptorArgs.push_back(&cu.getForce().getDevicePointer());
// if (!hasInitializedKernel) { acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
// hasInitializedKernel = true; acceptorArgs.push_back(&cu.getPosq().getDevicePointer());
// int index = 0; acceptorArgs.push_back(&acceptorExclusions->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, cu.getForceBuffers().getDevicePointer()); acceptorArgs.push_back(&donors->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer()); acceptorArgs.push_back(&acceptors->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer()); acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
// donorKernel.setArg<cu::Buffer>(index++, donorExclusions->getDevicePointer()); acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
// donorKernel.setArg<cu::Buffer>(index++, donors->getDevicePointer()); if (globals != NULL)
// donorKernel.setArg<cu::Buffer>(index++, acceptors->getDevicePointer()); acceptorArgs.push_back(&globals->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, donorBufferIndices->getDevicePointer()); for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
// donorKernel.setArg(index++, 3*CudaContext::ThreadBlockSize*sizeof(mm_float4), NULL); CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// index += 2; // Periodic box size arguments are set when the kernel is executed. acceptorArgs.push_back(&buffer.getMemory());
// if (globals != NULL) }
// donorKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer()); for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i]; acceptorArgs.push_back(&buffer.getMemory());
// donorKernel.setArg<cu::Memory>(index++, buffer.getMemory()); }
// } if (tabulatedFunctionParams != NULL) {
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; acceptorArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
// donorKernel.setArg<cu::Memory>(index++, buffer.getMemory()); acceptorArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
// } }
// if (tabulatedFunctionParams != NULL) { }
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++) int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4);
// donorKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer()); cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
// donorKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer()); cu.executeKernel(acceptorKernel, &acceptorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
// } return 0.0;
// index = 0; }
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getForceBuffers().getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer()); void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& context, const CustomHbondForce& force) {
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer()); cu.setAsCurrent();
// acceptorKernel.setArg<cu::Buffer>(index++, acceptorExclusions->getDevicePointer()); int numContexts = cu.getPlatformData().contexts.size();
// acceptorKernel.setArg<cu::Buffer>(index++, donors->getDevicePointer()); int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// acceptorKernel.setArg<cu::Buffer>(index++, acceptors->getDevicePointer()); int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
// acceptorKernel.setArg<cu::Buffer>(index++, acceptorBufferIndices->getDevicePointer()); if (numDonors != endIndex-startIndex)
// acceptorKernel.setArg(index++, 3*CudaContext::ThreadBlockSize*sizeof(mm_float4), NULL); throw OpenMMException("updateParametersInContext: The number of donors has changed");
// index += 2; // Periodic box size arguments are set when the kernel is executed. if (numAcceptors != force.getNumAcceptors())
// if (globals != NULL) throw OpenMMException("updateParametersInContext: The number of acceptors has changed");
// acceptorKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer());
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { // Record the per-donor parameters.
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// acceptorKernel.setArg<cu::Memory>(index++, buffer.getMemory()); vector<vector<float> > donorParamVector(numDonors);
// } vector<double> parameters;
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { for (int i = 0; i < numDonors; i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; int d1, d2, d3;
// acceptorKernel.setArg<cu::Memory>(index++, buffer.getMemory()); force.getDonorParameters(startIndex+i, d1, d2, d3, parameters);
// } donorParamVector[i].resize(parameters.size());
// if (tabulatedFunctionParams != NULL) { for (int j = 0; j < (int) parameters.size(); j++)
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++) donorParamVector[i][j] = (float) parameters[j];
// acceptorKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer()); }
// acceptorKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer()); donorParams->setParameterValues(donorParamVector);
// }
// } // Record the per-acceptor parameters.
// donorKernel.setArg<mm_float4>(8, cu.getPeriodicBoxSize());
// donorKernel.setArg<mm_float4>(9, cu.getInvPeriodicBoxSize()); vector<vector<float> > acceptorParamVector(numAcceptors);
// cu.executeKernel(donorKernel, max(numDonors, numAcceptors)); for (int i = 0; i < numAcceptors; i++) {
// acceptorKernel.setArg<mm_float4>(8, cu.getPeriodicBoxSize()); int a1, a2, a3;
// acceptorKernel.setArg<mm_float4>(9, cu.getInvPeriodicBoxSize()); force.getAcceptorParameters(i, a1, a2, a3, parameters);
// cu.executeKernel(acceptorKernel, max(numDonors, numAcceptors)); acceptorParamVector[i].resize(parameters.size());
// return 0.0; for (int j = 0; j < (int) parameters.size(); j++)
//} acceptorParamVector[i][j] = (float) parameters[j];
// }
//void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& context, const CustomHbondForce& force) { acceptorParams->setParameterValues(acceptorParamVector);
// cu.setAsCurrent();
// int numContexts = cu.getPlatformData().contexts.size(); // Mark that the current reordering may be invalid.
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts; cu.invalidateMolecules();
// if (numDonors != endIndex-startIndex) }
// throw OpenMMException("updateParametersInContext: The number of donors has changed");
// if (numAcceptors != force.getNumAcceptors())
// throw OpenMMException("updateParametersInContext: The number of acceptors has changed");
//
// // Record the per-donor parameters.
//
// vector<vector<cl_float> > donorParamVector(numDonors);
// vector<double> parameters;
// for (int i = 0; i < numDonors; i++) {
// int d1, d2, d3;
// force.getDonorParameters(startIndex+i, d1, d2, d3, parameters);
// donorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// donorParamVector[i][j] = (cl_float) parameters[j];
// }
// donorParams->setParameterValues(donorParamVector);
//
// // Record the per-acceptor parameters.
//
// vector<vector<cl_float> > acceptorParamVector(numAcceptors);
// for (int i = 0; i < numAcceptors; i++) {
// int a1, a2, a3;
// force.getAcceptorParameters(i, a1, a2, a3, parameters);
// acceptorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// acceptorParamVector[i][j] = (cl_float) parameters[j];
// }
// acceptorParams->setParameterValues(acceptorParamVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
class CudaCustomCompoundBondForceInfo : public CudaForceInfo { class CudaCustomCompoundBondForceInfo : public CudaForceInfo {
public: public:
...@@ -4332,635 +4299,740 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c ...@@ -4332,635 +4299,740 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c
blockSize = max(blockSize, params->getSize()); blockSize = max(blockSize, params->getSize());
} }
double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) { double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
// Select the step size to use.
double maxStepSize = maxTime-cu.getTime();
float maxStepSizeFloat = (float) maxStepSize;
double tol = integrator.getErrorTolerance();
float tolFloat = (float) tol;
double tau = integrator.getFriction() == 0.0 ? 0.0 : 1.0/integrator.getFriction();
float tauFloat = (float) tau;
double kT = BOLTZ*integrator.getTemperature();
float kTFloat = (float) kT;
void* argsSelect[] = {cu.getUseDoublePrecision() ? (void*) &maxStepSize : (void*) &maxStepSizeFloat,
cu.getUseDoublePrecision() ? (void*) &tol : (void*) &tolFloat,
cu.getUseDoublePrecision() ? (void*) &tau : (void*) &tauFloat,
cu.getUseDoublePrecision() ? (void*) &kT : (void*) &kTFloat,
&cu.getIntegrationUtilities().getStepSize().getDevicePointer(),
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &params->getDevicePointer()};
int sharedSize = blockSize*(cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
cu.executeKernel(selectSizeKernel, argsSelect, blockSize, blockSize, sharedSize);
// Call the first integration kernel.
int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&params->getDevicePointer(), &integration.getStepSize().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
cu.executeKernel(kernel1, args1, numAtoms);
// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
// Call the second integration kernel.
void* args2[] = {&cu.getPosq().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
cu.executeKernel(kernel2, args2, numAtoms);
integration.computeVirtualSites();
// Update the time and step count.
double dt, time;
if (cu.getUseDoublePrecision()) {
double2 stepSize;
cu.getIntegrationUtilities().getStepSize().download(&stepSize);
dt = stepSize.y;
time = cu.getTime()+dt;
if (dt == maxStepSize)
time = maxTime; // Avoid round-off error
}
else {
float2 stepSize;
cu.getIntegrationUtilities().getStepSize().download(&stepSize);
dt = stepSize.y;
time = cu.getTime()+dt;
if (dt == maxStepSizeFloat)
time = maxTime; // Avoid round-off error
}
cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1);
return dt;
}
class CudaIntegrateCustomStepKernel::ReorderListener : public CudaContext::ReorderListener {
public:
ReorderListener(CudaContext& cu, CudaParameterSet& perDofValues, vector<vector<float> >& localPerDofValuesFloat, vector<vector<double> >& localPerDofValuesDouble, bool& deviceValuesAreCurrent) :
cu(cu), perDofValues(perDofValues), localPerDofValuesFloat(localPerDofValuesFloat), localPerDofValuesDouble(localPerDofValuesDouble), deviceValuesAreCurrent(deviceValuesAreCurrent) {
int numAtoms = cu.getNumAtoms();
lastAtomOrder.resize(numAtoms);
for (int i = 0; i < numAtoms; i++)
lastAtomOrder[i] = cu.getAtomIndex()[i];
}
void execute() {
// Reorder the per-DOF variables to reflect the new atom order.
if (perDofValues.getNumParameters() == 0)
return;
int numAtoms = cu.getNumAtoms();
const vector<int>& order = cu.getAtomIndex();
if (cu.getUseDoublePrecision()) {
if (deviceValuesAreCurrent)
perDofValues.getParameterValues(localPerDofValuesDouble);
vector<vector<double> > swap(3*numAtoms);
for (int i = 0; i < numAtoms; i++) {
swap[3*lastAtomOrder[i]] = localPerDofValuesDouble[3*i];
swap[3*lastAtomOrder[i]+1] = localPerDofValuesDouble[3*i+1];
swap[3*lastAtomOrder[i]+2] = localPerDofValuesDouble[3*i+2];
}
for (int i = 0; i < numAtoms; i++) {
localPerDofValuesDouble[3*i] = swap[3*order[i]];
localPerDofValuesDouble[3*i+1] = swap[3*order[i]+1];
localPerDofValuesDouble[3*i+2] = swap[3*order[i]+2];
}
perDofValues.setParameterValues(localPerDofValuesDouble);
}
else {
if (deviceValuesAreCurrent)
perDofValues.getParameterValues(localPerDofValuesFloat);
vector<vector<float> > swap(3*numAtoms);
for (int i = 0; i < numAtoms; i++) {
swap[3*lastAtomOrder[i]] = localPerDofValuesFloat[3*i];
swap[3*lastAtomOrder[i]+1] = localPerDofValuesFloat[3*i+1];
swap[3*lastAtomOrder[i]+2] = localPerDofValuesFloat[3*i+2];
}
for (int i = 0; i < numAtoms; i++) {
localPerDofValuesFloat[3*i] = swap[3*order[i]];
localPerDofValuesFloat[3*i+1] = swap[3*order[i]+1];
localPerDofValuesFloat[3*i+2] = swap[3*order[i]+2];
}
perDofValues.setParameterValues(localPerDofValuesFloat);
}
for (int i = 0; i < numAtoms; i++)
lastAtomOrder[i] = order[i];
deviceValuesAreCurrent = true;
}
private:
CudaContext& cu;
CudaParameterSet& perDofValues;
vector<vector<float> >& localPerDofValuesFloat;
vector<vector<double> >& localPerDofValuesDouble;
bool& deviceValuesAreCurrent;
vector<int> lastAtomOrder;
};
CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
cu.setAsCurrent();
if (globalValues != NULL)
delete globalValues;
if (contextParameterValues != NULL)
delete contextParameterValues;
if (sumBuffer != NULL)
delete sumBuffer;
if (energy != NULL)
delete energy;
if (uniformRandoms != NULL)
delete uniformRandoms;
if (randomSeed != NULL)
delete randomSeed;
if (perDofValues != NULL)
delete perDofValues;
}
void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system);
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
globalValues = new CudaArray(cu, max(1, numGlobalVariables), elementSize, "globalVariables");
sumBuffer = new CudaArray(cu, 3*system.getNumParticles(), elementSize, "sumBuffer");
energy = new CudaArray(cu, 1, elementSize, "energy");
perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cu.getUseDoublePrecision());
cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
prevStepSize = -1.0;
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
}
string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const string& energyName) {
map<string, Lepton::ParsedExpression> expressions;
if (variable == "dt")
expressions["dt[0].y = "] = expr;
else {
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
if (variable == integrator.getGlobalVariableName(i))
expressions["globals["+cu.intToString(i)+"] = "] = expr;
for (int i = 0; i < (int) parameterNames.size(); i++)
if (variable == parameterNames[i]) {
expressions["params["+cu.intToString(i)+"] = "] = expr;
modifiesParameters = true;
}
}
if (expressions.size() == 0)
throw OpenMMException("Unknown global variable: "+variable);
map<string, string> variables;
variables["dt"] = "dt[0].y";
variables["uniform"] = "uniform";
variables["gaussian"] = "gaussian";
variables[energyName] = "energy[0]";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
vector<pair<string, string> > functions;
return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
}
string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
const string suffixes[] = {".x", ".y", ".z"};
string suffix = suffixes[component];
map<string, Lepton::ParsedExpression> expressions;
if (variable == "x")
expressions["position"+suffix+" = "] = expr;
else if (variable == "v")
expressions["velocity"+suffix+" = "] = expr;
else if (variable == "")
expressions["sum[3*index+"+cu.intToString(component)+"] = "] = expr;
else {
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
if (variable == integrator.getPerDofVariableName(i))
expressions["perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i)+" = "] = expr;
}
if (expressions.size() == 0)
throw OpenMMException("Unknown per-DOF variable: "+variable);
map<string, string> variables;
variables["x"] = "position"+suffix;
variables["v"] = "velocity"+suffix;
variables[forceName] = "f"+suffix;
variables["gaussian"] = "gaussian"+suffix;
variables["uniform"] = "uniform"+suffix;
variables["m"] = "mass";
variables["dt"] = "stepSize";
variables[energyName] = "energy[0]";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
vector<pair<string, string> > functions;
return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp"+cu.intToString(component)+"_", "", "double");
}
void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
int numSteps = integrator.getNumComputations();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Select the step size to use. // Initialize various data structures.
double maxStepSize = maxTime-cu.getTime(); const map<string, double>& params = context.getParameters();
float maxStepSizeFloat = (float) maxStepSize; if (cu.getUseDoublePrecision()) {
double tol = integrator.getErrorTolerance(); contextParameterValues = CudaArray::create<double>(cu, max(1, (int) params.size()), "contextParameters");
float tolFloat = (float) tol; contextValuesDouble.resize(contextParameterValues->getSize());
double tau = integrator.getFriction() == 0.0 ? 0.0 : 1.0/integrator.getFriction(); for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
float tauFloat = (float) tau; contextValuesDouble[parameterNames.size()] = iter->second;
double kT = BOLTZ*integrator.getTemperature(); parameterNames.push_back(iter->first);
float kTFloat = (float) kT; }
void* argsSelect[] = {cu.getUseDoublePrecision() ? (void*) &maxStepSize : (void*) &maxStepSizeFloat, contextParameterValues->upload(contextValuesDouble);
cu.getUseDoublePrecision() ? (void*) &tol : (void*) &tolFloat, }
cu.getUseDoublePrecision() ? (void*) &tau : (void*) &tauFloat, else {
cu.getUseDoublePrecision() ? (void*) &kT : (void*) &kTFloat, contextParameterValues = CudaArray::create<float>(cu, max(1, (int) params.size()), "contextParameters");
&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), contextValuesFloat.resize(contextParameterValues->getSize());
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &params->getDevicePointer()}; for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
int sharedSize = blockSize*(cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); contextValuesFloat[parameterNames.size()] = (float) iter->second;
cu.executeKernel(selectSizeKernel, argsSelect, blockSize, blockSize, sharedSize); parameterNames.push_back(iter->first);
}
contextParameterValues->upload(contextValuesFloat);
}
kernels.resize(integrator.getNumComputations());
kernelArgs.resize(integrator.getNumComputations());
requiredGaussian.resize(integrator.getNumComputations(), 0);
requiredUniform.resize(integrator.getNumComputations(), 0);
needsForces.resize(numSteps, false);
needsEnergy.resize(numSteps, false);
forceGroup.resize(numSteps, -2);
invalidatesForces.resize(numSteps, false);
merged.resize(numSteps, false);
modifiesParameters = false;
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
defines["SUM_BUFFER_SIZE"] = "0";
defines["SUM_OUTPUT_INDEX"] = "0";
// Initialize the random number generator.
uniformRandoms = CudaArray::create<float4>(cu, cu.getNumAtoms(), "uniformRandoms");
randomSeed = CudaArray::create<int4>(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
vector<int4> seed(randomSeed->getSize());
unsigned int r = integrator.getRandomNumberSeed()+1;
for (int i = 0; i < randomSeed->getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed->upload(seed);
CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
set<string> affectsForce;
affectsForce.insert("x");
for (vector<ForceImpl*>::const_iterator iter = context.getForceImpls().begin(); iter != context.getForceImpls().end(); ++iter) {
const map<string, double> params = (*iter)->getDefaultParameters();
for (map<string, double>::const_iterator param = params.begin(); param != params.end(); ++param)
affectsForce.insert(param->first);
}
// Record information about all the computation steps.
stepType.resize(numSteps);
vector<string> variable(numSteps);
vector<Lepton::ParsedExpression> expression(numSteps);
vector<string> forceGroupName;
vector<string> energyGroupName;
for (int i = 0; i < 32; i++) {
stringstream fname;
fname << "f" << i;
forceGroupName.push_back(fname.str());
stringstream ename;
ename << "energy" << i;
energyGroupName.push_back(ename.str());
}
vector<string> forceName(numSteps, "f");
vector<string> energyName(numSteps, "energy");
for (int step = 0; step < numSteps; step++) {
string expr;
integrator.getComputationStep(step, stepType[step], variable[step], expr);
if (expr.size() > 0) {
expression[step] = Lepton::Parser::parse(expr).optimize();
if (usesVariable(expression[step], "f")) {
needsForces[step] = true;
forceGroup[step] = -1;
}
if (usesVariable(expression[step], "energy")) {
needsEnergy[step] = true;
forceGroup[step] = -1;
}
for (int i = 0; i < 32; i++) {
if (usesVariable(expression[step], forceGroupName[i])) {
if (forceGroup[step] != -2)
throw OpenMMException("A single computation step cannot depend on multiple force groups");
needsForces[step] = true;
forceGroup[step] = 1<<i;
forceName[step] = forceGroupName[i];
}
if (usesVariable(expression[step], energyGroupName[i])) {
if (forceGroup[step] != -2)
throw OpenMMException("A single computation step cannot depend on multiple force groups");
needsEnergy[step] = true;
forceGroup[step] = 1<<i;
energyName[step] = energyGroupName[i];
}
}
}
invalidatesForces[step] = (stepType[step] == CustomIntegrator::ConstrainPositions || affectsForce.find(variable[step]) != affectsForce.end());
if (forceGroup[step] == -2 && step > 0)
forceGroup[step] = forceGroup[step-1];
}
// Determine how each step will represent the position (as just a value, or a value plus a delta).
vector<bool> storePosAsDelta(numSteps, false);
vector<bool> loadPosAsDelta(numSteps, false);
bool beforeConstrain = false;
for (int step = numSteps-1; step >= 0; step--) {
if (stepType[step] == CustomIntegrator::ConstrainPositions)
beforeConstrain = true;
else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
storePosAsDelta[step] = true;
}
bool storedAsDelta = false;
for (int step = 0; step < numSteps; step++) {
loadPosAsDelta[step] = storedAsDelta;
if (storePosAsDelta[step] == true)
storedAsDelta = true;
if (stepType[step] == CustomIntegrator::ConstrainPositions)
storedAsDelta = false;
}
// Identify steps that can be merged into a single kernel.
for (int step = 1; step < numSteps; step++) {
if (needsForces[step] || needsEnergy[step])
continue;
if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal)
merged[step] = true;
if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof &&
!usesVariable(expression[step], "uniform"))
merged[step] = true;
}
// Loop over all steps and create the kernels for them.
for (int step = 0; step < numSteps; step++) {
if ((stepType[step] == CustomIntegrator::ComputePerDof || stepType[step] == CustomIntegrator::ComputeSum) && !merged[step]) {
// Compute a per-DOF value.
// Call the first integration kernel. stringstream compute;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
compute << buffer.getType()<<" perDofx"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index];\n";
compute << buffer.getType()<<" perDofy"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+1];\n";
compute << buffer.getType()<<" perDofz"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+2];\n";
}
int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
compute << "{\n";
for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]);
if (variable[j] == "x") {
if (storePosAsDelta[j])
compute << "posDelta[index] = convertFromDouble4(position-convertToDouble4(posq[index]));\n";
else
compute << "posq[index] = convertFromDouble4(position);\n";
}
else if (variable[j] == "v")
compute << "velm[index] = convertFromDouble4(velocity);\n";
else {
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index] = perDofx"<<cu.intToString(i+1)<<";\n";
compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+1] = perDofy"<<cu.intToString(i+1)<<";\n";
compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+2] = perDofz"<<cu.intToString(i+1)<<";\n";
}
}
compute << "}\n";
numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
numUniform += numAtoms*usesVariable(expression[j], "uniform");
}
map<string, string> replacements;
replacements["COMPUTE_STEP"] = compute.str();
stringstream args;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
string valueName = "perDofValues"+cu.intToString(i+1);
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
if (loadPosAsDelta[step])
defines["LOAD_POS_AS_DELTA"] = "1";
else if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA");
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customIntegratorPerDof, replacements), defines);
CUfunction kernel = cu.getKernel(module, "computePerDof");
kernels[step].push_back(kernel);
requiredGaussian[step] = numGaussian;
requiredUniform[step] = numUniform;
vector<void*> args1;
args1.push_back(&cu.getPosq().getDevicePointer());
args1.push_back(&integration.getPosDelta().getDevicePointer());
args1.push_back(&cu.getVelm().getDevicePointer());
args1.push_back(&cu.getForce().getDevicePointer());
args1.push_back(&integration.getStepSize().getDevicePointer());
args1.push_back(&globalValues->getDevicePointer());
args1.push_back(&contextParameterValues->getDevicePointer());
args1.push_back(&sumBuffer->getDevicePointer());
args1.push_back(&integration.getRandom().getDevicePointer());
args1.push_back(NULL);
args1.push_back(&uniformRandoms->getDevicePointer());
args1.push_back(&energy->getDevicePointer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
args1.push_back(&perDofValues->getBuffers()[i].getMemory());
kernelArgs[step].push_back(args1);
if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values.
vector<void*> args2;
args2.push_back(&sumBuffer->getDevicePointer());
bool found = false;
for (int j = 0; j < integrator.getNumGlobalVariables() && !found; j++)
if (variable[step] == integrator.getGlobalVariableName(j)) {
args2.push_back(&globalValues->getDevicePointer());
defines["SUM_OUTPUT_INDEX"] = cu.intToString(j);
found = true;
}
for (int j = 0; j < (int) parameterNames.size() && !found; j++)
if (variable[step] == parameterNames[j]) {
args2.push_back(&contextParameterValues->getDevicePointer());
defines["SUM_OUTPUT_INDEX"] = cu.intToString(j);
found = true;
modifiesParameters = true;
}
if (!found)
throw OpenMMException("Unknown global variable: "+variable[step]);
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
module = cu.createModule(CudaKernelSources::customIntegrator, defines);
kernel = cu.getKernel(module, "computeSum");
kernels[step].push_back(kernel);
kernelArgs[step].push_back(args2);
}
}
else if (stepType[step] == CustomIntegrator::ComputeGlobal && !merged[step]) {
// Compute a global value.
int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms()); stringstream compute;
void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(), for (int i = step; i < numSteps && (i == step || merged[i]); i++)
&params->getDevicePointer(), &integration.getStepSize().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex}; compute << "{\n" << createGlobalComputation(variable[i], expression[i], integrator, energyName[i]) << "}\n";
cu.executeKernel(kernel1, args1, numAtoms); map<string, string> replacements;
replacements["COMPUTE_STEP"] = compute.str();
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorGlobal, replacements), defines);
CUfunction kernel = cu.getKernel(module, "computeGlobal");
kernels[step].push_back(kernel);
vector<void*> args;
args.push_back(&integration.getStepSize().getDevicePointer());
args.push_back(&globalValues->getDevicePointer());
args.push_back(&contextParameterValues->getDevicePointer());
args.push_back(NULL);
args.push_back(NULL);
args.push_back(&energy->getDevicePointer());
kernelArgs[step].push_back(args);
}
else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
// Apply position constraints.
// Apply constraints. CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
CUfunction kernel = cu.getKernel(module, "applyPositionDeltas");
kernels[step].push_back(kernel);
vector<void*> args;
args.push_back(&cu.getPosq().getDevicePointer());
args.push_back(&integration.getPosDelta().getDevicePointer());
kernelArgs[step].push_back(args);
}
}
integration.applyConstraints(integrator.getConstraintTolerance()); // Create the kernel for summing energy.
// Call the second integration kernel. defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(cu.getEnergyBuffer().getSize());
CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
sumEnergyKernel = cu.getKernel(module, "computeSum");
}
void* args2[] = {&cu.getPosq().getDevicePointer(), &integration.getPosDelta().getDevicePointer(), // Make sure all values (variables, parameters, etc.) stored on the device are up to date.
&cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
cu.executeKernel(kernel2, args2, numAtoms); if (!deviceValuesAreCurrent) {
integration.computeVirtualSites(); if (cu.getUseDoublePrecision())
perDofValues->setParameterValues(localPerDofValuesDouble);
else
perDofValues->setParameterValues(localPerDofValuesFloat);
deviceValuesAreCurrent = true;
}
localValuesAreCurrent = false;
double stepSize = integrator.getStepSize();
if (stepSize != prevStepSize) {
if (cu.getUseDoublePrecision()) {
double size[] = {0, stepSize};
integration.getStepSize().upload(size);
}
else {
float size[] = {0, (float) stepSize};
integration.getStepSize().upload(size);
}
prevStepSize = stepSize;
}
bool paramsChanged = false;
if (cu.getUseDoublePrecision()) {
for (int i = 0; i < (int) parameterNames.size(); i++) {
double value = context.getParameter(parameterNames[i]);
if (value != contextValuesDouble[i]) {
contextValuesDouble[i] = value;
paramsChanged = true;
}
}
if (paramsChanged)
contextParameterValues->upload(contextValuesDouble);
}
else {
for (int i = 0; i < (int) parameterNames.size(); i++) {
float value = (float) context.getParameter(parameterNames[i]);
if (value != contextValuesFloat[i]) {
contextValuesFloat[i] = value;
paramsChanged = true;
}
}
if (paramsChanged)
contextParameterValues->upload(contextValuesFloat);
}
// Loop over computation steps in the integrator and execute them.
void* randomArgs[] = {&uniformRandoms->getDevicePointer(), &randomSeed->getDevicePointer()};
for (int i = 0; i < numSteps; i++) {
if ((needsForces[i] || needsEnergy[i]) && (!forcesAreValid || context.getLastForceGroups() != forceGroup[i])) {
// Recompute forces and/or energy. Figure out what is actually needed
// between now and the next time they get invalidated again.
bool computeForce = false, computeEnergy = false;
for (int j = i; ; j++) {
if (needsForces[j])
computeForce = true;
if (needsEnergy[j])
computeEnergy = true;
if (invalidatesForces[j])
break;
if (j == numSteps-1)
j = -1;
if (j == i-1)
break;
}
recordChangedParameters(context);
context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
if (computeEnergy) {
void* args[] = {&cu.getEnergyBuffer().getDevicePointer(), &energy->getDevicePointer()};
cu.executeKernel(sumEnergyKernel, &args[0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
forcesAreValid = true;
}
if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) {
int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]);
kernelArgs[i][0][9] = &randomIndex;
if (requiredUniform[i] > 0)
cu.executeKernel(randomKernel, &randomArgs[0], numAtoms);
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms);
}
else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) {
float uniform = SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber();
float gauss = SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();
kernelArgs[i][0][3] = &uniform;
kernelArgs[i][0][4] = &gauss;
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], 1, 1);
}
else if (stepType[i] == CustomIntegrator::ComputeSum) {
int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]);
kernelArgs[i][0][9] = &randomIndex;
if (requiredUniform[i] > 0)
cu.executeKernel(randomKernel, &randomArgs[0], numAtoms);
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms);
cu.executeKernel(kernels[i][1], &kernelArgs[i][1][0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
else if (stepType[i] == CustomIntegrator::UpdateContextState) {
recordChangedParameters(context);
context.updateContextState();
}
else if (stepType[i] == CustomIntegrator::ConstrainPositions) {
cu.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms);
cu.getIntegrationUtilities().computeVirtualSites();
}
else if (stepType[i] == CustomIntegrator::ConstrainVelocities) {
cu.getIntegrationUtilities().applyVelocityConstraints(integrator.getConstraintTolerance());
}
if (invalidatesForces[i])
forcesAreValid = false;
}
recordChangedParameters(context);
// Update the time and step count. // Update the time and step count.
double dt, time; cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1);
}
void CudaIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) {
if (!modifiesParameters)
return;
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
double2 stepSize; contextParameterValues->download(contextValuesDouble);
cu.getIntegrationUtilities().getStepSize().download(&stepSize); for (int i = 0; i < (int) parameterNames.size(); i++) {
dt = stepSize.y; double value = context.getParameter(parameterNames[i]);
time = cu.getTime()+dt; if (value != contextValuesDouble[i])
if (dt == maxStepSize) context.setParameter(parameterNames[i], contextValuesDouble[i]);
time = maxTime; // Avoid round-off error }
} }
else { else {
float2 stepSize; contextParameterValues->download(contextValuesFloat);
cu.getIntegrationUtilities().getStepSize().download(&stepSize); for (int i = 0; i < (int) parameterNames.size(); i++) {
dt = stepSize.y; float value = (float) context.getParameter(parameterNames[i]);
time = cu.getTime()+dt; if (value != contextValuesFloat[i])
if (dt == maxStepSizeFloat) context.setParameter(parameterNames[i], contextValuesFloat[i]);
time = maxTime; // Avoid round-off error }
} }
cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1);
return dt;
} }
//class CudaIntegrateCustomStepKernel::ReorderListener : public CudaContext::ReorderListener { void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
//public: values.resize(numGlobalVariables);
// ReorderListener(CudaContext& cu, CudaParameterSet& perDofValues, vector<vector<cl_float> >& localPerDofValues, bool& deviceValuesAreCurrent) : if (numGlobalVariables == 0)
// cu(cu), perDofValues(perDofValues), localPerDofValues(localPerDofValues), deviceValuesAreCurrent(deviceValuesAreCurrent) { return;
// int numAtoms = cu.getNumAtoms(); if (cu.getUseDoublePrecision())
// lastAtomOrder.resize(numAtoms); globalValues->download(values);
// for (int i = 0; i < numAtoms; i++) else {
// lastAtomOrder[i] = cu.getAtomIndex()[i]; vector<float> buffer;
// } globalValues->download(buffer);
// void execute() { for (int i = 0; i < numGlobalVariables; i++)
// // Reorder the per-DOF variables to reflect the new atom order. values[i] = buffer[i];
// }
// if (perDofValues.getNumParameters() == 0) }
// return;
// int numAtoms = cu.getNumAtoms(); void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
// if (deviceValuesAreCurrent) if (numGlobalVariables == 0)
// perDofValues.getParameterValues(localPerDofValues); return;
// vector<vector<cl_float> > swap(3*numAtoms); if (cu.getUseDoublePrecision())
// for (int i = 0; i < numAtoms; i++) { globalValues->upload(values);
// swap[3*lastAtomOrder[i]] = localPerDofValues[3*i]; else {
// swap[3*lastAtomOrder[i]+1] = localPerDofValues[3*i+1]; vector<float> buffer(numGlobalVariables);
// swap[3*lastAtomOrder[i]+2] = localPerDofValues[3*i+2]; for (int i = 0; i < numGlobalVariables; i++)
// } buffer[i] = (float) values[i];
// CudaArray<cl_int>& order = cu.getAtomIndex(); globalValues->upload(buffer);
// for (int i = 0; i < numAtoms; i++) { }
// localPerDofValues[3*i] = swap[3*order[i]]; }
// localPerDofValues[3*i+1] = swap[3*order[i]+1];
// localPerDofValues[3*i+2] = swap[3*order[i]+2]; void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
// } values.resize(perDofValues->getNumObjects()/3);
// perDofValues.setParameterValues(localPerDofValues); const vector<int>& order = cu.getAtomIndex();
// for (int i = 0; i < numAtoms; i++) if (cu.getUseDoublePrecision()) {
// lastAtomOrder[i] = order[i]; if (!localValuesAreCurrent) {
// deviceValuesAreCurrent = true; perDofValues->getParameterValues(localPerDofValuesDouble);
// } localValuesAreCurrent = true;
//private: }
// CudaContext& cu; for (int i = 0; i < (int) values.size(); i++)
// CudaParameterSet& perDofValues; for (int j = 0; j < 3; j++)
// vector<vector<cl_float> >& localPerDofValues; values[order[i]][j] = localPerDofValuesDouble[3*i+j][variable];
// bool& deviceValuesAreCurrent; }
// vector<int> lastAtomOrder; else {
//}; if (!localValuesAreCurrent) {
// perDofValues->getParameterValues(localPerDofValuesFloat);
//CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() { localValuesAreCurrent = true;
// cu.setAsCurrent(); }
// if (globalValues != NULL) for (int i = 0; i < (int) values.size(); i++)
// delete globalValues; for (int j = 0; j < 3; j++)
// if (contextParameterValues != NULL) values[order[i]][j] = localPerDofValuesFloat[3*i+j][variable];
// delete contextParameterValues; }
// if (sumBuffer != NULL) }
// delete sumBuffer;
// if (energy != NULL) void CudaIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, int variable, const vector<Vec3>& values) {
// delete energy; const vector<int>& order = cu.getAtomIndex();
// if (uniformRandoms != NULL) if (cu.getUseDoublePrecision()) {
// delete uniformRandoms; if (!localValuesAreCurrent) {
// if (randomSeed != NULL) perDofValues->getParameterValues(localPerDofValuesDouble);
// delete randomSeed; localValuesAreCurrent = true;
// if (perDofValues != NULL) }
// delete perDofValues; for (int i = 0; i < (int) values.size(); i++)
//} for (int j = 0; j < 3; j++)
// localPerDofValuesDouble[3*i+j][variable] = values[order[i]][j];
//void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) { }
// cu.setAsCurrent(); else {
// cu.getPlatformData().initializeContexts(system); if (!localValuesAreCurrent) {
// cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); perDofValues->getParameterValues(localPerDofValuesFloat);
// numGlobalVariables = integrator.getNumGlobalVariables(); localValuesAreCurrent = true;
// globalValues = new CudaArray<cl_float>(cu, max(1, numGlobalVariables), "globalVariables", true); }
// sumBuffer = new CudaArray<cl_float>(cu, 3*system.getNumParticles(), "sumBuffer"); for (int i = 0; i < (int) values.size(); i++)
// energy = new CudaArray<cl_float>(cu, 1, "energy"); for (int j = 0; j < 3; j++)
// perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables"); localPerDofValuesFloat[3*i+j][variable] = (float) values[order[i]][j];
// cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValues, deviceValuesAreCurrent)); }
// prevStepSize = -1.0; deviceValuesAreCurrent = false;
// SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed()); }
//}
//
//string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const string& energyName) {
// map<string, Lepton::ParsedExpression> expressions;
// if (variable == "dt")
// expressions["dt[0].y = "] = expr;
// else {
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// if (variable == integrator.getGlobalVariableName(i))
// expressions["globals["+cu.intToString(i)+"] = "] = expr;
// for (int i = 0; i < (int) parameterNames.size(); i++)
// if (variable == parameterNames[i]) {
// expressions["params["+cu.intToString(i)+"] = "] = expr;
// modifiesParameters = true;
// }
// }
// if (expressions.size() == 0)
// throw OpenMMException("Unknown global variable: "+variable);
// map<string, string> variables;
// variables["dt"] = "dt[0].y";
// variables["uniform"] = "uniform";
// variables["gaussian"] = "gaussian";
// variables[energyName] = "energy[0]";
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
// for (int i = 0; i < (int) parameterNames.size(); i++)
// variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
// vector<pair<string, string> > functions;
// return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
//}
//
//string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
// const string suffixes[] = {".x", ".y", ".z"};
// string suffix = suffixes[component];
// map<string, Lepton::ParsedExpression> expressions;
// if (variable == "x")
// expressions["position"+suffix+" = "] = expr;
// else if (variable == "v")
// expressions["velocity"+suffix+" = "] = expr;
// else if (variable == "")
// expressions["sum[3*index+"+cu.intToString(component)+"] = "] = expr;
// else {
// for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
// if (variable == integrator.getPerDofVariableName(i))
// expressions["perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i)+" = "] = expr;
// }
// if (expressions.size() == 0)
// throw OpenMMException("Unknown per-DOF variable: "+variable);
// map<string, string> variables;
// variables["x"] = "position"+suffix;
// variables["v"] = "velocity"+suffix;
// variables[forceName] = "f"+suffix;
// variables["gaussian"] = "gaussian"+suffix;
// variables["uniform"] = "uniform"+suffix;
// variables["m"] = "mass";
// variables["dt"] = "stepSize";
// variables[energyName] = "energy[0]";
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
// for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
// variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
// for (int i = 0; i < (int) parameterNames.size(); i++)
// variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
// vector<pair<string, string> > functions;
// string tempType = (cu.getSupportsDoublePrecision() ? "double" : "float");
// return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp"+cu.intToString(component)+"_", "", tempType);
//}
//
//void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// int numAtoms = cu.getNumAtoms();
// int numSteps = integrator.getNumComputations();
// if (!hasInitializedKernels) {
// hasInitializedKernels = true;
//
// // Initialize various data structures.
//
// const map<string, double>& params = context.getParameters();
// contextParameterValues = new CudaArray<cl_float>(cu, max(1, (int) params.size()), "contextParameters", true);
// for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
// contextParameterValues->set(parameterNames.size(), (float) iter->second);
// parameterNames.push_back(iter->first);
// }
// contextParameterValues->upload();
// kernels.resize(integrator.getNumComputations());
// requiredGaussian.resize(integrator.getNumComputations(), 0);
// requiredUniform.resize(integrator.getNumComputations(), 0);
// needsForces.resize(numSteps, false);
// needsEnergy.resize(numSteps, false);
// forceGroup.resize(numSteps, -2);
// invalidatesForces.resize(numSteps, false);
// merged.resize(numSteps, false);
// modifiesParameters = false;
// map<string, string> defines;
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
//
// // Initialize the random number generator.
//
// uniformRandoms = new CudaArray<mm_float4>(cu, cu.getNumAtoms(), "uniformRandoms");
// randomSeed = new CudaArray<mm_int4>(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
// vector<mm_int4> seed(randomSeed->getSize());
// unsigned int r = integrator.getRandomNumberSeed()+1;
// for (int i = 0; i < randomSeed->getSize(); i++) {
// seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// }
// randomSeed->upload(seed);
// CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
// randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// randomKernel.setArg<cu::Buffer>(0, uniformRandoms->getDevicePointer());
// randomKernel.setArg<cu::Buffer>(1, randomSeed->getDevicePointer());
//
// // Build a list of all variables that affect the forces, so we can tell which
// // steps invalidate them.
//
// set<string> affectsForce;
// affectsForce.insert("x");
// for (vector<ForceImpl*>::const_iterator iter = context.getForceImpls().begin(); iter != context.getForceImpls().end(); ++iter) {
// const map<string, double> params = (*iter)->getDefaultParameters();
// for (map<string, double>::const_iterator param = params.begin(); param != params.end(); ++param)
// affectsForce.insert(param->first);
// }
//
// // Record information about all the computation steps.
//
// stepType.resize(numSteps);
// vector<string> variable(numSteps);
// vector<Lepton::ParsedExpression> expression(numSteps);
// vector<string> forceGroupName;
// vector<string> energyGroupName;
// for (int i = 0; i < 32; i++) {
// stringstream fname;
// fname << "f" << i;
// forceGroupName.push_back(fname.str());
// stringstream ename;
// ename << "energy" << i;
// energyGroupName.push_back(ename.str());
// }
// vector<string> forceName(numSteps, "f");
// vector<string> energyName(numSteps, "energy");
// for (int step = 0; step < numSteps; step++) {
// string expr;
// integrator.getComputationStep(step, stepType[step], variable[step], expr);
// if (expr.size() > 0) {
// expression[step] = Lepton::Parser::parse(expr).optimize();
// if (usesVariable(expression[step], "f")) {
// needsForces[step] = true;
// forceGroup[step] = -1;
// }
// if (usesVariable(expression[step], "energy")) {
// needsEnergy[step] = true;
// forceGroup[step] = -1;
// }
// for (int i = 0; i < 32; i++) {
// if (usesVariable(expression[step], forceGroupName[i])) {
// if (forceGroup[step] != -2)
// throw OpenMMException("A single computation step cannot depend on multiple force groups");
// needsForces[step] = true;
// forceGroup[step] = 1<<i;
// forceName[step] = forceGroupName[i];
// }
// if (usesVariable(expression[step], energyGroupName[i])) {
// if (forceGroup[step] != -2)
// throw OpenMMException("A single computation step cannot depend on multiple force groups");
// needsEnergy[step] = true;
// forceGroup[step] = 1<<i;
// energyName[step] = energyGroupName[i];
// }
// }
// }
// invalidatesForces[step] = (stepType[step] == CustomIntegrator::ConstrainPositions || affectsForce.find(variable[step]) != affectsForce.end());
// if (forceGroup[step] == -2 && step > 0)
// forceGroup[step] = forceGroup[step-1];
// }
//
// // Determine how each step will represent the position (as just a value, or a value plus a delta).
//
// vector<bool> storePosAsDelta(numSteps, false);
// vector<bool> loadPosAsDelta(numSteps, false);
// bool beforeConstrain = false;
// for (int step = numSteps-1; step >= 0; step--) {
// if (stepType[step] == CustomIntegrator::ConstrainPositions)
// beforeConstrain = true;
// else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
// storePosAsDelta[step] = true;
// }
// bool storedAsDelta = false;
// for (int step = 0; step < numSteps; step++) {
// loadPosAsDelta[step] = storedAsDelta;
// if (storePosAsDelta[step] == true)
// storedAsDelta = true;
// if (stepType[step] == CustomIntegrator::ConstrainPositions)
// storedAsDelta = false;
// }
//
// // Identify steps that can be merged into a single kernel.
//
// for (int step = 1; step < numSteps; step++) {
// if (needsForces[step] || needsEnergy[step])
// continue;
// if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal)
// merged[step] = true;
// if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof &&
// !usesVariable(expression[step], "uniform"))
// merged[step] = true;
// }
//
// // Loop over all steps and create the kernels for them.
//
// for (int step = 0; step < numSteps; step++) {
// if ((stepType[step] == CustomIntegrator::ComputePerDof || stepType[step] == CustomIntegrator::ComputeSum) && !merged[step]) {
// // Compute a per-DOF value.
//
// stringstream compute;
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// compute << buffer.getType()<<" perDofx"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index];\n";
// compute << buffer.getType()<<" perDofy"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+1];\n";
// compute << buffer.getType()<<" perDofz"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+2];\n";
// }
// string convert = (cu.getSupportsDoublePrecision() ? "convert_float4(" : "(");
// int numGaussian = 0, numUniform = 0;
// for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
// compute << "{\n";
// for (int i = 0; i < 3; i++)
// compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]);
// if (variable[j] == "x") {
// if (storePosAsDelta[j]) {
// if (cu.getSupportsDoublePrecision())
// compute << "posDelta[index] = convert_float4(position-convert_double4(posq[index]));\n";
// else
// compute << "posDelta[index] = position-posq[index];\n";
// }
// else
// compute << "posq[index] = " << convert << "position);\n";
// }
// else if (variable[j] == "v")
// compute << "velm[index] = " << convert << "velocity);\n";
// else {
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index] = perDofx"<<cu.intToString(i+1)<<";\n";
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+1] = perDofy"<<cu.intToString(i+1)<<";\n";
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+2] = perDofz"<<cu.intToString(i+1)<<";\n";
// }
// }
// compute << "}\n";
// numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
// numUniform += numAtoms*usesVariable(expression[j], "uniform");
// }
// map<string, string> replacements;
// replacements["COMPUTE_STEP"] = compute.str();
// stringstream args;
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// string valueName = "perDofValues"+cu.intToString(i+1);
// args << ", __global " << buffer.getType() << "* restrict " << valueName;
// }
// replacements["PARAMETER_ARGUMENTS"] = args.str();
// if (loadPosAsDelta[step])
// defines["LOAD_POS_AS_DELTA"] = "1";
// else if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
// defines.erase("LOAD_POS_AS_DELTA");
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
// cu::Kernel kernel = cu.getKernel(module, "computePerDof");
// kernels[step].push_back(kernel);
// requiredGaussian[step] = numGaussian;
// requiredUniform[step] = numUniform;
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getPosDelta().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, cu.getVelm().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, cu.getForce().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getStepSize().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, sumBuffer->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getRandom().getDevicePointer());
// index++;
// kernel.setArg<cu::Buffer>(index++, uniformRandoms->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
// kernel.setArg<cu::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
// if (stepType[step] == CustomIntegrator::ComputeSum) {
// // Create a second kernel for this step that sums the values.
//
// module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// kernel = cu.getKernel(module, "computeSum");
// kernels[step].push_back(kernel);
// index = 0;
// kernel.setArg<cu::Buffer>(index++, sumBuffer->getDevicePointer());
// bool found = false;
// for (int j = 0; j < integrator.getNumGlobalVariables() && !found; j++)
// if (variable[step] == integrator.getGlobalVariableName(j)) {
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cl_uint>(index++, j);
// found = true;
// }
// for (int j = 0; j < (int) parameterNames.size() && !found; j++)
// if (variable[step] == parameterNames[j]) {
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// kernel.setArg<cl_uint>(index++, j);
// found = true;
// modifiesParameters = true;
// }
// if (!found)
// throw OpenMMException("Unknown global variable: "+variable[step]);
// kernel.setArg<cl_int>(index++, 3*numAtoms);
// }
// }
// else if (stepType[step] == CustomIntegrator::ComputeGlobal && !merged[step]) {
// // Compute a global value.
//
// stringstream compute;
// for (int i = step; i < numSteps && (i == step || merged[i]); i++)
// compute << "{\n" << createGlobalComputation(variable[i], expression[i], integrator, energyName[i]) << "}\n";
// map<string, string> replacements;
// replacements["COMPUTE_STEP"] = compute.str();
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorGlobal, replacements), defines);
// cu::Kernel kernel = cu.getKernel(module, "computeGlobal");
// kernels[step].push_back(kernel);
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, integration.getStepSize().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// index += 2;
// kernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// }
// else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
// // Apply position constraints.
//
// CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// cu::Kernel kernel = cu.getKernel(module, "applyPositionDeltas");
// kernels[step].push_back(kernel);
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getPosDelta().getDevicePointer());
// }
// }
//
// // Create the kernel for summing energy.
//
// CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// sumEnergyKernel = cu.getKernel(module, "computeSum");
// int index = 0;
// sumEnergyKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
// sumEnergyKernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// sumEnergyKernel.setArg<cl_int>(index++, 0);
// sumEnergyKernel.setArg<cl_int>(index++, cu.getEnergyBuffer().getSize());
// }
//
// // Make sure all values (variables, parameters, etc.) stored on the device are up to date.
//
// if (!deviceValuesAreCurrent) {
// perDofValues->setParameterValues(localPerDofValues);
// deviceValuesAreCurrent = true;
// }
// localValuesAreCurrent = false;
// double stepSize = integrator.getStepSize();
// if (stepSize != prevStepSize) {
// integration.getStepSize()[0].y = (cl_float) stepSize;
// integration.getStepSize().upload();
// prevStepSize = stepSize;
// }
// bool paramsChanged = false;
// for (int i = 0; i < (int) parameterNames.size(); i++) {
// float value = (float) context.getParameter(parameterNames[i]);
// if (value != contextParameterValues->get(i)) {
// contextParameterValues->set(i, value);
// paramsChanged = true;
// }
// }
// if (paramsChanged)
// contextParameterValues->upload();
//
// // Loop over computation steps in the integrator and execute them.
//
// for (int i = 0; i < numSteps; i++) {
// if ((needsForces[i] || needsEnergy[i]) && (!forcesAreValid || context.getLastForceGroups() != forceGroup[i])) {
// // Recompute forces and/or energy. Figure out what is actually needed
// // between now and the next time they get invalidated again.
//
// bool computeForce = false, computeEnergy = false;
// for (int j = i; ; j++) {
// if (needsForces[j])
// computeForce = true;
// if (needsEnergy[j])
// computeEnergy = true;
// if (invalidatesForces[j])
// break;
// if (j == numSteps-1)
// j = -1;
// if (j == i-1)
// break;
// }
// recordChangedParameters(context);
// context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
// if (computeEnergy)
// cu.executeKernel(sumEnergyKernel, CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
// forcesAreValid = true;
// }
// if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) {
// kernels[i][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[i]));
// if (requiredUniform[i] > 0)
// cu.executeKernel(randomKernel, numAtoms);
// cu.executeKernel(kernels[i][0], numAtoms);
// }
// else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) {
// kernels[i][0].setArg<cl_float>(3, SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
// kernels[i][0].setArg<cl_float>(4, SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
// cu.executeKernel(kernels[i][0], 1, 1);
// }
// else if (stepType[i] == CustomIntegrator::ComputeSum) {
// kernels[i][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[i]));
// if (requiredUniform[i] > 0)
// cu.executeKernel(randomKernel, numAtoms);
// cu.executeKernel(kernels[i][0], numAtoms);
// cu.executeKernel(kernels[i][1], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
// }
// else if (stepType[i] == CustomIntegrator::UpdateContextState) {
// recordChangedParameters(context);
// context.updateContextState();
// }
// else if (stepType[i] == CustomIntegrator::ConstrainPositions) {
// cu.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
// cu.executeKernel(kernels[i][0], numAtoms);
// cu.getIntegrationUtilities().computeVirtualSites();
// }
// else if (stepType[i] == CustomIntegrator::ConstrainVelocities) {
// cu.getIntegrationUtilities().applyVelocityConstraints(integrator.getConstraintTolerance());
// }
// if (invalidatesForces[i])
// forcesAreValid = false;
// }
// recordChangedParameters(context);
//
// // Update the time and step count.
//
// cu.setTime(cu.getTime()+stepSize);
// cu.setStepCount(cu.getStepCount()+1);
//}
//
//void CudaIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) {
// if (!modifiesParameters)
// return;
// contextParameterValues->download();
// for (int i = 0; i < (int) parameterNames.size(); i++) {
// float value = (float) context.getParameter(parameterNames[i]);
// if (value != contextParameterValues->get(i))
// context.setParameter(parameterNames[i], contextParameterValues->get(i));
// }
//}
//
//void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
// globalValues->download();
// values.resize(numGlobalVariables);
// for (int i = 0; i < numGlobalVariables; i++)
// values[i] = globalValues->get(i);
//}
//
//void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
// for (int i = 0; i < numGlobalVariables; i++)
// globalValues->set(i, (float) values[i]);
// globalValues->upload();
//}
//
//void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
// if (!localValuesAreCurrent) {
// perDofValues->getParameterValues(localPerDofValues);
// localValuesAreCurrent = true;
// }
// values.resize(perDofValues->getNumObjects()/3);
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < (int) values.size(); i++)
// for (int j = 0; j < 3; j++)
// values[order[i]][j] = localPerDofValues[3*i+j][variable];
//}
//
//void CudaIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, int variable, const vector<Vec3>& values) {
// if (!localValuesAreCurrent) {
// perDofValues->getParameterValues(localPerDofValues);
// localValuesAreCurrent = true;
// }
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < (int) values.size(); i++)
// for (int j = 0; j < 3; j++)
// localPerDofValues[3*i+j][variable] = (float) values[order[i]][j];
// deviceValuesAreCurrent = false;
//}
CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() { CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
cu.setAsCurrent(); cu.setAsCurrent();
......
...@@ -623,50 +623,49 @@ private: ...@@ -623,50 +623,49 @@ private:
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
///** /**
// * This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system. * This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
// */ */
//class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel { class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
//public: public:
// CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomNonbondedForceKernel(name, platform), CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomNonbondedForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) { cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) {
// } }
// ~CudaCalcCustomNonbondedForceKernel(); ~CudaCalcCustomNonbondedForceKernel();
// /** /**
// * Initialize the kernel. * Initialize the kernel.
// * *
// * @param system the System this kernel will be applied to * @param system the System this kernel will be applied to
// * @param force the CustomNonbondedForce this kernel will be used for * @param force the CustomNonbondedForce this kernel will be used for
// */ */
// void initialize(const System& system, const CustomNonbondedForce& force); void initialize(const System& system, const CustomNonbondedForce& force);
// /** /**
// * Execute the kernel to calculate the forces and/or energy. * Execute the kernel to calculate the forces and/or energy.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @param includeForces true if forces should be calculated * @param includeForces true if forces should be calculated
// * @param includeEnergy true if the energy should be calculated * @param includeEnergy true if the energy should be calculated
// * @return the potential energy due to the force * @return the potential energy due to the force
// */ */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /** /**
// * Copy changed parameters over to a context. * Copy changed parameters over to a context.
// * *
// * @param context the context to copy parameters to * @param context the context to copy parameters to
// * @param force the CustomNonbondedForce to copy the parameters from * @param force the CustomNonbondedForce to copy the parameters from
// */ */
// void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
//private: private:
// bool hasInitializedKernel; CudaContext& cu;
// CudaContext& cu; CudaParameterSet* params;
// CudaParameterSet* params; CudaArray* globals;
// CudaArray<cl_float>* globals; CudaArray* tabulatedFunctionParams;
// CudaArray<mm_float4>* tabulatedFunctionParams; std::vector<std::string> globalParamNames;
// std::vector<std::string> globalParamNames; std::vector<float> globalParamValues;
// std::vector<cl_float> globalParamValues; std::vector<CudaArray*> tabulatedFunctions;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions; System& system;
// System& system; };
//};
//
///** ///**
// * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system. // * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
// */ // */
...@@ -814,60 +813,58 @@ private: ...@@ -814,60 +813,58 @@ private:
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
}; };
///** /**
// * This kernel is invoked by CustomHbondForce to calculate the forces acting on the system. * This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
// */ */
//class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel { class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
//public: public:
// CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomHbondForceKernel(name, platform), CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomHbondForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL), hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL),
// donorBufferIndices(NULL), acceptorBufferIndices(NULL), globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL), globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL), tabulatedFunctionParams(NULL), system(system) {
// tabulatedFunctionParams(NULL), system(system) { }
// } ~CudaCalcCustomHbondForceKernel();
// ~CudaCalcCustomHbondForceKernel(); /**
// /** * Initialize the kernel.
// * Initialize the kernel. *
// * * @param system the System this kernel will be applied to
// * @param system the System this kernel will be applied to * @param force the CustomHbondForce this kernel will be used for
// * @param force the CustomHbondForce this kernel will be used for */
// */ void initialize(const System& system, const CustomHbondForce& force);
// void initialize(const System& system, const CustomHbondForce& force); /**
// /** * Execute the kernel to calculate the forces and/or energy.
// * Execute the kernel to calculate the forces and/or energy. *
// * * @param context the context in which to execute this kernel
// * @param context the context in which to execute this kernel * @param includeForces true if forces should be calculated
// * @param includeForces true if forces should be calculated * @param includeEnergy true if the energy should be calculated
// * @param includeEnergy true if the energy should be calculated * @return the potential energy due to the force
// * @return the potential energy due to the force */
// */ double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy); /**
// /** * Copy changed parameters over to a context.
// * Copy changed parameters over to a context. *
// * * @param context the context to copy parameters to
// * @param context the context to copy parameters to * @param force the CustomHbondForce to copy the parameters from
// * @param force the CustomHbondForce to copy the parameters from */
// */ void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
// void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force); private:
//private: int numDonors, numAcceptors;
// int numDonors, numAcceptors; bool hasInitializedKernel;
// bool hasInitializedKernel; CudaContext& cu;
// CudaContext& cu; CudaParameterSet* donorParams;
// CudaParameterSet* donorParams; CudaParameterSet* acceptorParams;
// CudaParameterSet* acceptorParams; CudaArray* globals;
// CudaArray<cl_float>* globals; CudaArray* donors;
// CudaArray<mm_int4>* donors; CudaArray* acceptors;
// CudaArray<mm_int4>* acceptors; CudaArray* donorExclusions;
// CudaArray<mm_int4>* donorBufferIndices; CudaArray* acceptorExclusions;
// CudaArray<mm_int4>* acceptorBufferIndices; CudaArray* tabulatedFunctionParams;
// CudaArray<mm_int4>* donorExclusions; std::vector<std::string> globalParamNames;
// CudaArray<mm_int4>* acceptorExclusions; std::vector<float> globalParamValues;
// CudaArray<mm_float4>* tabulatedFunctionParams; std::vector<CudaArray*> tabulatedFunctions;
// std::vector<std::string> globalParamNames; std::vector<void*> donorArgs, acceptorArgs;
// std::vector<cl_float> globalParamValues; System& system;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions; CUfunction donorKernel, acceptorKernel;
// System& system; };
// CUfunction donorKernel, acceptorKernel;
//};
/** /**
* This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system. * This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
...@@ -1062,94 +1059,98 @@ private: ...@@ -1062,94 +1059,98 @@ private:
double prevTemp, prevFriction, prevErrorTol; double prevTemp, prevFriction, prevErrorTol;
}; };
///** /**
// * This kernel is invoked by CustomIntegrator to take one time step. * This kernel is invoked by CustomIntegrator to take one time step.
// */ */
//class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel { class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
//public: public:
// CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu), CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
// hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), energy(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), energy(NULL),
// uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) { uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) {
// } }
// ~CudaIntegrateCustomStepKernel(); ~CudaIntegrateCustomStepKernel();
// /** /**
// * Initialize the kernel. * Initialize the kernel.
// * *
// * @param system the System this kernel will be applied to * @param system the System this kernel will be applied to
// * @param integrator the CustomIntegrator this kernel will be used for * @param integrator the CustomIntegrator this kernel will be used for
// */ */
// void initialize(const System& system, const CustomIntegrator& integrator); void initialize(const System& system, const CustomIntegrator& integrator);
// /** /**
// * Execute the kernel. * Execute the kernel.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @param integrator the CustomIntegrator this kernel is being used for * @param integrator the CustomIntegrator this kernel is being used for
// * @param forcesAreValid if the context has been modified since the last time step, this will be * @param forcesAreValid if the context has been modified since the last time step, this will be
// * false to show that cached forces are invalid and must be recalculated. * false to show that cached forces are invalid and must be recalculated.
// * On exit, this should specify whether the cached forces are valid at the * On exit, this should specify whether the cached forces are valid at the
// * end of the step. * end of the step.
// */ */
// void execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
// /** /**
// * Get the values of all global variables. * Get the values of all global variables.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @param values on exit, this contains the values * @param values on exit, this contains the values
// */ */
// void getGlobalVariables(ContextImpl& context, std::vector<double>& values) const; void getGlobalVariables(ContextImpl& context, std::vector<double>& values) const;
// /** /**
// * Set the values of all global variables. * Set the values of all global variables.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @param values a vector containing the values * @param values a vector containing the values
// */ */
// void setGlobalVariables(ContextImpl& context, const std::vector<double>& values); void setGlobalVariables(ContextImpl& context, const std::vector<double>& values);
// /** /**
// * Get the values of a per-DOF variable. * Get the values of a per-DOF variable.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @param variable the index of the variable to get * @param variable the index of the variable to get
// * @param values on exit, this contains the values * @param values on exit, this contains the values
// */ */
// void getPerDofVariable(ContextImpl& context, int variable, std::vector<Vec3>& values) const; void getPerDofVariable(ContextImpl& context, int variable, std::vector<Vec3>& values) const;
// /** /**
// * Set the values of a per-DOF variable. * Set the values of a per-DOF variable.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @param variable the index of the variable to get * @param variable the index of the variable to get
// * @param values a vector containing the values * @param values a vector containing the values
// */ */
// void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values); void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values);
//private: private:
// class ReorderListener; class ReorderListener;
// std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName); std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName);
// std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName); std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
// void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
// CudaContext& cu; CudaContext& cu;
// double prevStepSize; double prevStepSize;
// int numGlobalVariables; int numGlobalVariables;
// bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters; bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters;
// mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
// CudaArray<cl_float>* globalValues; CudaArray* globalValues;
// CudaArray<cl_float>* contextParameterValues; CudaArray* contextParameterValues;
// CudaArray<cl_float>* sumBuffer; CudaArray* sumBuffer;
// CudaArray<cl_float>* energy; CudaArray* energy;
// CudaArray<mm_float4>* uniformRandoms; CudaArray* uniformRandoms;
// CudaArray<mm_int4>* randomSeed; CudaArray* randomSeed;
// CudaParameterSet* perDofValues; CudaParameterSet* perDofValues;
// mutable std::vector<std::vector<cl_float> > localPerDofValues; mutable std::vector<std::vector<float> > localPerDofValuesFloat;
// std::vector<std::vector<CUfunction> > kernels; mutable std::vector<std::vector<double> > localPerDofValuesDouble;
// CUfunction sumEnergyKernel, randomKernel; std::vector<float> contextValuesFloat;
// std::vector<CustomIntegrator::ComputationType> stepType; std::vector<double> contextValuesDouble;
// std::vector<bool> needsForces; std::vector<std::vector<CUfunction> > kernels;
// std::vector<bool> needsEnergy; std::vector<std::vector<std::vector<void*> > > kernelArgs;
// std::vector<bool> invalidatesForces; CUfunction sumEnergyKernel, randomKernel;
// std::vector<bool> merged; std::vector<CustomIntegrator::ComputationType> stepType;
// std::vector<int> forceGroup; std::vector<bool> needsForces;
// std::vector<int> requiredGaussian; std::vector<bool> needsEnergy;
// std::vector<int> requiredUniform; std::vector<bool> invalidatesForces;
// std::vector<std::string> parameterNames; std::vector<bool> merged;
//}; std::vector<int> forceGroup;
std::vector<int> requiredGaussian;
std::vector<int> requiredUniform;
std::vector<std::string> parameterNames;
};
/** /**
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities. * This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
......
...@@ -39,11 +39,12 @@ using namespace std; ...@@ -39,11 +39,12 @@ using namespace std;
throw OpenMMException(m.str());\ throw OpenMMException(m.str());\
} }
CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int numObjects, const string& name, bool bufferPerParameter) : CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int numObjects, const string& name, bool bufferPerParameter, bool useDoublePrecision) :
context(context), numParameters(numParameters), numObjects(numObjects), name(name) { context(context), numParameters(numParameters), numObjects(numObjects), name(name) {
int params = numParameters; int params = numParameters;
int bufferCount = 0; int bufferCount = 0;
int elementSize = 4; elementSize = (useDoublePrecision ? sizeof(double) : sizeof(float));
string elementType = (useDoublePrecision ? "double" : "float");
CUdeviceptr pointer; CUdeviceptr pointer;
string errorMessage = "Error creating parameter set "+name; string errorMessage = "Error creating parameter set "+name;
if (!bufferPerParameter) { if (!bufferPerParameter) {
...@@ -51,14 +52,14 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int ...@@ -51,14 +52,14 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
CHECK_RESULT(cuMemAlloc(&pointer, numObjects*elementSize*4)); CHECK_RESULT(cuMemAlloc(&pointer, numObjects*elementSize*4));
std::stringstream name; std::stringstream name;
name << "param" << (++bufferCount); name << "param" << (++bufferCount);
buffers.push_back(CudaNonbondedUtilities::ParameterInfo(name.str(), "float", 4, elementSize*4, pointer)); buffers.push_back(CudaNonbondedUtilities::ParameterInfo(name.str(), elementType, 4, elementSize*4, pointer));
params -= 4; params -= 4;
} }
if (params > 1) { if (params > 1) {
CHECK_RESULT(cuMemAlloc(&pointer, numObjects*elementSize*2)); CHECK_RESULT(cuMemAlloc(&pointer, numObjects*elementSize*2));
std::stringstream name; std::stringstream name;
name << "param" << (++bufferCount); name << "param" << (++bufferCount);
buffers.push_back(CudaNonbondedUtilities::ParameterInfo(name.str(), "float", 2, elementSize*2, pointer)); buffers.push_back(CudaNonbondedUtilities::ParameterInfo(name.str(), elementType, 2, elementSize*2, pointer));
params -= 2; params -= 2;
} }
} }
...@@ -66,50 +67,55 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int ...@@ -66,50 +67,55 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
CHECK_RESULT(cuMemAlloc(&pointer, numObjects*elementSize)); CHECK_RESULT(cuMemAlloc(&pointer, numObjects*elementSize));
std::stringstream name; std::stringstream name;
name << "param" << (++bufferCount); name << "param" << (++bufferCount);
buffers.push_back(CudaNonbondedUtilities::ParameterInfo(name.str(), "float", 1, elementSize, pointer)); buffers.push_back(CudaNonbondedUtilities::ParameterInfo(name.str(), elementType, 1, elementSize, pointer));
params--; params--;
} }
} }
CudaParameterSet::~CudaParameterSet() { CudaParameterSet::~CudaParameterSet() {
if (context.getContextIsValid()) {
string errorMessage = "Error freeing device memory"; string errorMessage = "Error freeing device memory";
for (int i = 0; i < (int) buffers.size(); i++) for (int i = 0; i < (int) buffers.size(); i++)
CHECK_RESULT(cuMemFree(buffers[i].getMemory())); CHECK_RESULT(cuMemFree(buffers[i].getMemory()));
}
} }
void CudaParameterSet::getParameterValues(vector<vector<float> >& values) { template <class T>
void CudaParameterSet::getParameterValues(vector<vector<T> >& values) {
if (sizeof(T) != elementSize)
throw OpenMMException("Called getParameterValues() with vector of wrong type");
values.resize(numObjects); values.resize(numObjects);
for (int i = 0; i < numObjects; i++) for (int i = 0; i < numObjects; i++)
values[i].resize(numParameters); values[i].resize(numParameters);
int base = 0; int base = 0;
string errorMessage = "Error downloading parameter set "+name; string errorMessage = "Error downloading parameter set "+name;
for (int i = 0; i < (int) buffers.size(); i++) { for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") { if (buffers[i].getSize() == 4*elementSize) {
vector<float4> data(numObjects); vector<T> data(4*numObjects);
CHECK_RESULT(cuMemcpyDtoH(&data[0], buffers[i].getMemory(), numObjects*buffers[i].getSize())); CHECK_RESULT(cuMemcpyDtoH(&data[0], buffers[i].getMemory(), numObjects*buffers[i].getSize()));
for (int j = 0; j < numObjects; j++) { for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x; values[j][base] = data[4*j];
if (base+1 < numParameters) if (base+1 < numParameters)
values[j][base+1] = data[j].y; values[j][base+1] = data[4*j+1];
if (base+2 < numParameters) if (base+2 < numParameters)
values[j][base+2] = data[j].z; values[j][base+2] = data[4*j+2];
if (base+3 < numParameters) if (base+3 < numParameters)
values[j][base+3] = data[j].w; values[j][base+3] = data[4*j+3];
} }
base += 4; base += 4;
} }
else if (buffers[i].getType() == "float2") { else if (buffers[i].getSize() == 2*elementSize) {
vector<float2> data(numObjects); vector<T> data(2*numObjects);
CHECK_RESULT(cuMemcpyDtoH(&data[0], buffers[i].getMemory(), numObjects*buffers[i].getSize())); CHECK_RESULT(cuMemcpyDtoH(&data[0], buffers[i].getMemory(), numObjects*buffers[i].getSize()));
for (int j = 0; j < numObjects; j++) { for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x; values[j][base] = data[2*j];
if (base+1 < numParameters) if (base+1 < numParameters)
values[j][base+1] = data[j].y; values[j][base+1] = data[2*j+1];
} }
base += 2; base += 2;
} }
else if (buffers[i].getType() == "float") { else if (buffers[i].getSize() == elementSize) {
vector<float> data(numObjects); vector<T> data(numObjects);
CHECK_RESULT(cuMemcpyDtoH(&data[0], buffers[i].getMemory(), numObjects*buffers[i].getSize())); CHECK_RESULT(cuMemcpyDtoH(&data[0], buffers[i].getMemory(), numObjects*buffers[i].getSize()));
for (int j = 0; j < numObjects; j++) for (int j = 0; j < numObjects; j++)
values[j][base] = data[j]; values[j][base] = data[j];
...@@ -120,36 +126,39 @@ void CudaParameterSet::getParameterValues(vector<vector<float> >& values) { ...@@ -120,36 +126,39 @@ void CudaParameterSet::getParameterValues(vector<vector<float> >& values) {
} }
} }
void CudaParameterSet::setParameterValues(const vector<vector<float> >& values) { template <class T>
void CudaParameterSet::setParameterValues(const vector<vector<T> >& values) {
if (sizeof(T) != elementSize)
throw OpenMMException("Called setParameterValues() with vector of wrong type");
int base = 0; int base = 0;
string errorMessage = "Error uploading parameter set "+name; string errorMessage = "Error uploading parameter set "+name;
for (int i = 0; i < (int) buffers.size(); i++) { for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") { if (buffers[i].getSize() == 4*elementSize) {
vector<float4> data(numObjects); vector<T> data(4*numObjects);
for (int j = 0; j < numObjects; j++) { for (int j = 0; j < numObjects; j++) {
data[j].x = values[j][base]; data[4*j] = values[j][base];
if (base+1 < numParameters) if (base+1 < numParameters)
data[j].y = values[j][base+1]; data[4*j+1] = values[j][base+1];
if (base+2 < numParameters) if (base+2 < numParameters)
data[j].z = values[j][base+2]; data[4*j+2] = values[j][base+2];
if (base+3 < numParameters) if (base+3 < numParameters)
data[j].w = values[j][base+3]; data[4*j+3] = values[j][base+3];
} }
CHECK_RESULT(cuMemcpyHtoD(buffers[i].getMemory(), &data[0], numObjects*buffers[i].getSize())); CHECK_RESULT(cuMemcpyHtoD(buffers[i].getMemory(), &data[0], numObjects*buffers[i].getSize()));
base += 4; base += 4;
} }
else if (buffers[i].getType() == "float2") { else if (buffers[i].getSize() == 2*elementSize) {
vector<float2> data(numObjects); vector<T> data(2*numObjects);
for (int j = 0; j < numObjects; j++) { for (int j = 0; j < numObjects; j++) {
data[j].x = values[j][base]; data[2*j] = values[j][base];
if (base+1 < numParameters) if (base+1 < numParameters)
data[j].y = values[j][base+1]; data[2*j+1] = values[j][base+1];
} }
CHECK_RESULT(cuMemcpyHtoD(buffers[i].getMemory(), &data[0], numObjects*buffers[i].getSize())); CHECK_RESULT(cuMemcpyHtoD(buffers[i].getMemory(), &data[0], numObjects*buffers[i].getSize()));
base += 2; base += 2;
} }
else if (buffers[i].getType() == "float") { else if (buffers[i].getSize() == elementSize) {
vector<float> data(numObjects); vector<T> data(numObjects);
for (int j = 0; j < numObjects; j++) for (int j = 0; j < numObjects; j++)
data[j] = values[j][base]; data[j] = values[j][base];
CHECK_RESULT(cuMemcpyHtoD(buffers[i].getMemory(), &data[0], numObjects*buffers[i].getSize())); CHECK_RESULT(cuMemcpyHtoD(buffers[i].getMemory(), &data[0], numObjects*buffers[i].getSize()));
...@@ -164,16 +173,26 @@ string CudaParameterSet::getParameterSuffix(int index, const std::string& extraS ...@@ -164,16 +173,26 @@ string CudaParameterSet::getParameterSuffix(int index, const std::string& extraS
const string suffixes[] = {".x", ".y", ".z", ".w"}; const string suffixes[] = {".x", ".y", ".z", ".w"};
int buffer = -1; int buffer = -1;
for (int i = 0; buffer == -1 && i < (int) buffers.size(); i++) { for (int i = 0; buffer == -1 && i < (int) buffers.size(); i++) {
if (index*sizeof(float) < buffers[i].getSize()) if (index*elementSize < buffers[i].getSize())
buffer = i; buffer = i;
else else
index -= buffers[i].getSize()/sizeof(float); index -= buffers[i].getSize()/elementSize;
} }
if (buffer == -1) if (buffer == -1)
throw OpenMMException("Internal error: Illegal argument to CudaParameterSet::getParameterSuffix() ("+name+")"); throw OpenMMException("Internal error: Illegal argument to CudaParameterSet::getParameterSuffix() ("+name+")");
stringstream suffix; stringstream suffix;
suffix << (buffer+1) << extraSuffix; suffix << (buffer+1) << extraSuffix;
if (buffers[buffer].getType() != "float") if (buffers[buffer].getSize() != elementSize)
suffix << suffixes[index]; suffix << suffixes[index];
return suffix.str(); return suffix.str();
} }
/**
* Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
*/
namespace OpenMM {
template void CudaParameterSet::getParameterValues<float>(vector<vector<float> >& values);
template void CudaParameterSet::setParameterValues<float>(const vector<vector<float> >& values);
template void CudaParameterSet::getParameterValues<double>(vector<vector<double> >& values);
template void CudaParameterSet::setParameterValues<double>(const vector<vector<double> >& values);
}
\ No newline at end of file
...@@ -51,8 +51,9 @@ public: ...@@ -51,8 +51,9 @@ public:
* @param name the name of the parameter set * @param name the name of the parameter set
* @param bufferPerParameter if true, a separate buffer is created for each parameter. If false, * @param bufferPerParameter if true, a separate buffer is created for each parameter. If false,
* multiple parameters may be combined into a single buffer. * multiple parameters may be combined into a single buffer.
* @param useDoublePrecision whether values should be stored as single or double precision
*/ */
CudaParameterSet(CudaContext& context, int numParameters, int numObjects, const std::string& name, bool bufferPerParameter=false); CudaParameterSet(CudaContext& context, int numParameters, int numObjects, const std::string& name, bool bufferPerParameter=false, bool useDoublePrecision=false);
~CudaParameterSet(); ~CudaParameterSet();
/** /**
* Get the number of parameters. * Get the number of parameters.
...@@ -71,13 +72,15 @@ public: ...@@ -71,13 +72,15 @@ public:
* *
* @param values on exit, values[i][j] contains the value of parameter j for object i * @param values on exit, values[i][j] contains the value of parameter j for object i
*/ */
void getParameterValues(std::vector<std::vector<float> >& values); template <class T>
void getParameterValues(std::vector<std::vector<T> >& values);
/** /**
* Set the values of all parameters. * Set the values of all parameters.
* *
* @param values values[i][j] contains the value of parameter j for object i * @param values values[i][j] contains the value of parameter j for object i
*/ */
void setParameterValues(const std::vector<std::vector<float> >& values); template <class T>
void setParameterValues(const std::vector<std::vector<T> >& values);
/** /**
* Get a set of CudaNonbondedUtilities::ParameterInfo objects which describe the Buffers * Get a set of CudaNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data. * containing the data.
...@@ -95,8 +98,7 @@ public: ...@@ -95,8 +98,7 @@ public:
std::string getParameterSuffix(int index, const std::string& extraSuffix = "") const; std::string getParameterSuffix(int index, const std::string& extraSuffix = "") const;
private: private:
CudaContext& context; CudaContext& context;
int numParameters; int numParameters, numObjects, elementSize;
int numObjects;
std::string name; std::string name;
std::vector<CudaNonbondedUtilities::ParameterInfo> buffers; std::vector<CudaNonbondedUtilities::ParameterInfo> buffers;
}; };
......
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline __device__ real3 trim(real4 v) {
return make_real3(v.x, v.y, v.z);
}
/**
* This does nothing, and just exists to simply the code generation.
*/
inline __device__ real3 trim(real3 v) {
return v;
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 delta(real4 vec1, real4 vec2) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
/**
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/
inline __device__ real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
inline __device__ real computeAngle(real4 vec1, real4 vec2) {
real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3 crossProduct = cross(vec1, vec2);
real scale = vec1.w*vec2.w;
angle = asin(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f)
angle = M_PI-angle;
}
else
angle = acos(cosine);
return angle;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
real3 result = cross(vec1, vec2);
return make_real4(result.x, result.y, result.z, result.x*result.x + result.y*result.y + result.z*result.z);
}
/**
* Compute forces on donors.
*/
extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize
PARAMETER_ARGUMENTS) {
extern __shared__ real4 posBuffer[];
real energy = 0;
real3 f1 = make_real3(0);
real3 f2 = make_real3(0);
real3 f3 = make_real3(0);
for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += blockDim.x*gridDim.x) {
// Load information about the donor this thread will compute forces on.
int donorIndex = donorStart+blockIdx.x*blockDim.x+threadIdx.x;
int4 atoms, exclusionIndices;
real4 d1, d2, d3;
if (donorIndex < NUM_DONORS) {
atoms = donorAtoms[donorIndex];
d1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
d2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
d3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
#ifdef USE_EXCLUSIONS
exclusionIndices = exclusions[donorIndex];
#endif
}
else
atoms = make_int4(-1, -1, -1, -1);
for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += blockDim.x) {
// Load the next block of acceptors into local memory.
int blockSize = min((int) blockDim.x, NUM_ACCEPTORS-acceptorStart);
if (threadIdx.x < blockSize) {
int4 atoms2 = acceptorAtoms[acceptorStart+threadIdx.x];
posBuffer[3*threadIdx.x] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
posBuffer[3*threadIdx.x+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
posBuffer[3*threadIdx.x+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
}
__syncthreads();
if (donorIndex < NUM_DONORS) {
for (int index = 0; index < blockSize; index++) {
#ifdef USE_EXCLUSIONS
int acceptorIndex = acceptorStart+index;
if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w)
continue;
#endif
// Compute the interaction between a donor and an acceptor.
real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
COMPUTE_DONOR_FORCE
#ifdef USE_CUTOFF
}
#endif
}
}
}
// Write results
if (donorIndex < NUM_DONORS) {
if (atoms.x > -1) {
atomicAdd(&force[atoms.x], static_cast<unsigned long long>((long long) (f1.x*0xFFFFFFFF)));
atomicAdd(&force[atoms.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.y*0xFFFFFFFF)));
atomicAdd(&force[atoms.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.z*0xFFFFFFFF)));
__threadfence_block();
}
if (atoms.y > -1) {
atomicAdd(&force[atoms.y], static_cast<unsigned long long>((long long) (f2.x*0xFFFFFFFF)));
atomicAdd(&force[atoms.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.y*0xFFFFFFFF)));
atomicAdd(&force[atoms.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.z*0xFFFFFFFF)));
__threadfence_block();
}
if (atoms.z > -1) {
atomicAdd(&force[atoms.z], static_cast<unsigned long long>((long long) (f3.x*0xFFFFFFFF)));
atomicAdd(&force[atoms.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.y*0xFFFFFFFF)));
atomicAdd(&force[atoms.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.z*0xFFFFFFFF)));
__threadfence_block();
}
}
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
* Compute forces on acceptors.
*/
extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize
PARAMETER_ARGUMENTS) {
extern __shared__ real4 posBuffer[];
real3 f1 = make_real3(0);
real3 f2 = make_real3(0);
real3 f3 = make_real3(0);
for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += blockDim.x*gridDim.x) {
// Load information about the acceptor this thread will compute forces on.
int acceptorIndex = acceptorStart+blockIdx.x*blockDim.x+threadIdx.x;
int4 atoms, exclusionIndices;
real4 a1, a2, a3;
if (acceptorIndex < NUM_ACCEPTORS) {
atoms = acceptorAtoms[acceptorIndex];
a1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
a2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
a3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
#ifdef USE_EXCLUSIONS
exclusionIndices = exclusions[acceptorIndex];
#endif
}
else
atoms = make_int4(-1, -1, -1, -1);
for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += blockDim.x) {
// Load the next block of donors into local memory.
int blockSize = min((int) blockDim.x, NUM_DONORS-donorStart);
if (threadIdx.x < blockSize) {
int4 atoms2 = donorAtoms[donorStart+threadIdx.x];
posBuffer[3*threadIdx.x] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
posBuffer[3*threadIdx.x+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
posBuffer[3*threadIdx.x+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
}
__syncthreads();
if (acceptorIndex < NUM_ACCEPTORS) {
for (int index = 0; index < blockSize; index++) {
#ifdef USE_EXCLUSIONS
int donorIndex = donorStart+index;
if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z || donorIndex == exclusionIndices.w)
continue;
#endif
// Compute the interaction between a donor and an acceptor.
real4 d1 = posBuffer[3*index];
real4 d2 = posBuffer[3*index+1];
real4 d3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
COMPUTE_ACCEPTOR_FORCE
#ifdef USE_CUTOFF
}
#endif
}
}
}
// Write results
if (acceptorIndex < NUM_ACCEPTORS) {
if (atoms.x > -1) {
atomicAdd(&force[atoms.x], static_cast<unsigned long long>((long long) (f1.x*0xFFFFFFFF)));
atomicAdd(&force[atoms.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.y*0xFFFFFFFF)));
atomicAdd(&force[atoms.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.z*0xFFFFFFFF)));
__threadfence_block();
}
if (atoms.y > -1) {
atomicAdd(&force[atoms.y], static_cast<unsigned long long>((long long) (f2.x*0xFFFFFFFF)));
atomicAdd(&force[atoms.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.y*0xFFFFFFFF)));
atomicAdd(&force[atoms.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.z*0xFFFFFFFF)));
__threadfence_block();
}
if (atoms.z > -1) {
atomicAdd(&force[atoms.z], static_cast<unsigned long long>((long long) (f3.x*0xFFFFFFFF)));
atomicAdd(&force[atoms.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.y*0xFFFFFFFF)));
atomicAdd(&force[atoms.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.z*0xFFFFFFFF)));
__threadfence_block();
}
}
}
}
extern "C" __global__ void computeSum(const real* __restrict__ sumBuffer, real* result) {
__shared__ real tempBuffer[WORK_GROUP_SIZE];
const unsigned int thread = threadIdx.x;
real sum = 0;
for (unsigned int index = thread; index < SUM_BUFFER_SIZE; index += blockDim.x)
sum += sumBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < WORK_GROUP_SIZE; i *= 2) {
__syncthreads();
if (thread%(i*2) == 0 && thread+i < WORK_GROUP_SIZE)
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
result[SUM_OUTPUT_INDEX] = tempBuffer[0];
}
extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4* __restrict__ posDelta) {
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 position = posq[index];
position.x += posDelta[index].x;
position.y += posDelta[index].y;
position.z += posDelta[index].z;
posq[index] = position;
posDelta[index] = make_real4(0, 0, 0, 0);
}
}
extern "C" __global__ void generateRandomNumbers(float4* __restrict__ random, uint4* __restrict__ seed) {
uint4 state = seed[blockIdx.x*blockDim.x+threadIdx.x];
unsigned int carry = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Generate three uniform random numbers.
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
unsigned int k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
unsigned int m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x1 = (float)max(state.x + state.y + state.w, 0x00000001u) / (float)0xffffffff;
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x2 = (float)max(state.x + state.y + state.w, 0x00000001u) / (float)0xffffffff;
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x3 = (float)max(state.x + state.y + state.w, 0x00000001u) / (float)0xffffffff;
// Record the values.
random[index] = make_float4(x1, x2, x3, 0.0f);
}
seed[blockIdx.x*blockDim.x+threadIdx.x] = state;
}
extern "C" __global__ void computeGlobal(real2* __restrict__ dt, real* __restrict__ globals, real* __restrict__ params,
float uniform, float gaussian, const real* __restrict__ energy) {
COMPUTE_STEP
}
inline __device__ double4 convertToDouble4(real4 a) {
return make_double4(a.x, a.y, a.z, a.w);
}
inline __device__ real4 convertFromDouble4(double4 a) {
return make_real4(a.x, a.y, a.z, a.w);
}
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posDelta, real4* __restrict__ velm,
const long long* __restrict__ force, const real2* __restrict__ dt, const real* __restrict__ globals,
const real* __restrict__ params, real* __restrict__ sum, const float4* __restrict__ gaussianValues,
unsigned int randomIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy
PARAMETER_ARGUMENTS) {
real stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
randomIndex += index;
const double forceScale = 1.0/0xFFFFFFFF;
while (index < NUM_ATOMS) {
#ifdef LOAD_POS_AS_DELTA
double4 position = convertToDouble4(posq[index]+posDelta[index]);
#else
double4 position = convertToDouble4(posq[index]);
#endif
double4 velocity = convertToDouble4(velm[index]);
double4 f = make_double4(forceScale*force[index], forceScale*force[index+PADDED_NUM_ATOMS], forceScale*force[index+PADDED_NUM_ATOMS*2], 0.0);
double mass = 1.0/velocity.w;
if (velocity.w != 0.0) {
float4 gaussian = gaussianValues[randomIndex];
float4 uniform = uniformValues[index];
COMPUTE_STEP
}
randomIndex += blockDim.x*gridDim.x;
index += blockDim.x*gridDim.x;
}
}
#ifdef USE_CUTOFF
if (!isExcluded && r2 < CUTOFF_SQUARED) {
#else
if (!isExcluded) {
#endif
real tempForce = 0;
COMPUTE_FORCE
dEdR += tempForce*invR;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CustomHbondForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomHbondForce.h"
#include "openmm/HarmonicAngleForce.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testHbond() {
CudaPlatform platform;
// Create a system using a CustomHbondForce.
System customSystem;
customSystem.addParticle(1.0);
customSystem.addParticle(1.0);
customSystem.addParticle(1.0);
customSystem.addParticle(1.0);
customSystem.addParticle(1.0);
CustomHbondForce* custom = new CustomHbondForce("0.5*kr*(distance(d1,a1)-r0)^2 + 0.5*ktheta*(angle(a1,d1,d2)-theta0)^2 + 0.5*kpsi*(angle(d1,a1,a2)-psi0)^2 + kchi*(1+cos(n*dihedral(a3,a2,a1,d1)-chi0))");
custom->addPerDonorParameter("r0");
custom->addPerDonorParameter("theta0");
custom->addPerDonorParameter("psi0");
custom->addPerAcceptorParameter("chi0");
custom->addPerAcceptorParameter("n");
custom->addGlobalParameter("kr", 0.4);
custom->addGlobalParameter("ktheta", 0.5);
custom->addGlobalParameter("kpsi", 0.6);
custom->addGlobalParameter("kchi", 0.7);
vector<double> parameters(3);
parameters[0] = 1.5;
parameters[1] = 1.7;
parameters[2] = 1.9;
custom->addDonor(1, 0, -1, parameters);
parameters.resize(2);
parameters[0] = 2.1;
parameters[1] = 2;
custom->addAcceptor(2, 3, 4, parameters);
custom->setCutoffDistance(10.0);
customSystem.addForce(custom);
// Create an identical system using HarmonicBondForce, HarmonicAngleForce, and PeriodicTorsionForce.
System standardSystem;
standardSystem.addParticle(1.0);
standardSystem.addParticle(1.0);
standardSystem.addParticle(1.0);
standardSystem.addParticle(1.0);
standardSystem.addParticle(1.0);
HarmonicBondForce* bond = new HarmonicBondForce();
bond->addBond(1, 2, 1.5, 0.4);
standardSystem.addForce(bond);
HarmonicAngleForce* angle = new HarmonicAngleForce();
angle->addAngle(0, 1, 2, 1.7, 0.5);
angle->addAngle(1, 2, 3, 1.9, 0.6);
standardSystem.addForce(angle);
PeriodicTorsionForce* torsion = new PeriodicTorsionForce();
torsion->addTorsion(1, 2, 3, 4, 2, 2.1, 0.7);
standardSystem.addForce(torsion);
// Set the atoms in various positions, and verify that both systems give identical forces and energy.
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(5);
VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01);
Context c1(customSystem, integrator1, platform);
Context c2(standardSystem, integrator2, platform);
for (int i = 0; i < 10; i++) {
for (int j = 0; j < (int) positions.size(); j++)
positions[j] = Vec3(2.0*genrand_real2(sfmt), 2.0*genrand_real2(sfmt), 2.0*genrand_real2(sfmt));
c1.setPositions(positions);
c2.setPositions(positions);
State s1 = c1.getState(State::Forces | State::Energy);
State s2 = c2.getState(State::Forces | State::Energy);
for (int i = 0; i < customSystem.getNumParticles(); i++)
ASSERT_EQUAL_VEC(s2.getForces()[i], s1.getForces()[i], TOL);
ASSERT_EQUAL_TOL(s2.getPotentialEnergy(), s1.getPotentialEnergy(), TOL);
}
// Try changing the parameters and make sure it's still correct.
parameters.resize(3);
parameters[0] = 1.4;
parameters[1] = 1.7;
parameters[2] = 1.9;
custom->setDonorParameters(0, 1, 0, -1, parameters);
parameters.resize(2);
parameters[0] = 2.2;
parameters[1] = 2;
custom->setAcceptorParameters(0, 2, 3, 4, parameters);
bond->setBondParameters(0, 1, 2, 1.4, 0.4);
torsion->setTorsionParameters(0, 1, 2, 3, 4, 2, 2.2, 0.7);
custom->updateParametersInContext(c1);
bond->updateParametersInContext(c2);
torsion->updateParametersInContext(c2);
State s1 = c1.getState(State::Forces | State::Energy);
State s2 = c2.getState(State::Forces | State::Energy);
for (int i = 0; i < customSystem.getNumParticles(); i++)
ASSERT_EQUAL_VEC(s2.getForces()[i], s1.getForces()[i], TOL);
ASSERT_EQUAL_TOL(s2.getPotentialEnergy(), s1.getPotentialEnergy(), TOL);
}
void testExclusions() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomHbondForce* custom = new CustomHbondForce("(distance(d1,a1)-1)^2");
custom->addDonor(0, 1, -1, vector<double>());
custom->addDonor(1, 0, -1, vector<double>());
custom->addAcceptor(2, 0, -1, vector<double>());
custom->addExclusion(1, 0);
system.addForce(custom);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 2, 0);
positions[2] = Vec3(2, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(2, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(-2, 0, 0), forces[2], TOL);
ASSERT_EQUAL_TOL(1.0, state.getPotentialEnergy(), TOL);
}
void testCutoff() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomHbondForce* custom = new CustomHbondForce("(distance(d1,a1)-1)^2");
custom->addDonor(0, 1, -1, vector<double>());
custom->addDonor(1, 0, -1, vector<double>());
custom->addAcceptor(2, 0, -1, vector<double>());
custom->setNonbondedMethod(CustomHbondForce::CutoffNonPeriodic);
custom->setCutoffDistance(2.5);
system.addForce(custom);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 3, 0);
positions[2] = Vec3(2, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(2, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(-2, 0, 0), forces[2], TOL);
ASSERT_EQUAL_TOL(1.0, state.getPotentialEnergy(), TOL);
}
void testCustomFunctions() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomHbondForce* custom = new CustomHbondForce("foo(distance(d1,a1))");
custom->addDonor(1, 0, -1, vector<double>());
custom->addDonor(2, 0, -1, vector<double>());
custom->addAcceptor(0, 1, -1, vector<double>());
vector<double> function(2);
function[0] = 0;
function[1] = 1;
custom->addFunction("foo", function, 0, 10);
system.addForce(custom);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 2, 0);
positions[2] = Vec3(2, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0.1, 0.1, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -0.1, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(-0.1, 0, 0), forces[2], TOL);
ASSERT_EQUAL_TOL(0.1*2+0.1*2, state.getPotentialEnergy(), TOL);
}
int main() {
try {
testHbond();
testExclusions();
testCutoff();
testCustomFunctions();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CustomIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/CustomIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
/**
* Test a simple leapfrog integrator on a single bond.
*/
void testSingleBond() {
CudaPlatform platform;
System system;
system.addParticle(2.0);
system.addParticle(2.0);
CustomIntegrator integrator(0.01);
integrator.addComputePerDof("v", "v+dt*f/m");
integrator.addComputePerDof("x", "x+dt*v");
HarmonicBondForce* forceField = new HarmonicBondForce();
forceField->addBond(0, 1, 1.5, 1);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
// This is simply a harmonic oscillator, so compare it to the analytical solution.
const double freq = 1.0;;
State state = context.getState(State::Energy);
const double initialEnergy = state.getKineticEnergy()+state.getPotentialEnergy();
for (int i = 0; i < 1000; ++i) {
state = context.getState(State::Positions | State::Velocities | State::Energy);
double time = state.getTime();
double expectedDist = 1.5+0.5*std::cos(freq*time);
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
double expectedSpeed = -0.5*freq*std::sin(freq*time);
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.02);
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
integrator.step(1);
}
}
/**
* Test an integrator that enforces constraints.
*/
void testConstraints() {
const int numParticles = 8;
const double temp = 500.0;
CudaPlatform platform;
System system;
CustomIntegrator integrator(0.002);
integrator.addPerDofVariable("oldx", 0);
integrator.addComputePerDof("v", "v+dt*f/m");
integrator.addComputePerDof("oldx", "x");
integrator.addComputePerDof("x", "x+dt*v");
integrator.addConstrainPositions();
integrator.addComputePerDof("v", "(x-oldx)/dt");
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(i%2 == 0 ? 5.0 : 10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
for (int i = 0; i < numParticles-1; ++i)
system.addConstraint(i, i+1, 1.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3(i/2, (i+1)/2, 0);
velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Energy);
for (int j = 0; j < system.getNumConstraints(); ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 2e-5);
}
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
integrator.step(1);
}
}
/**
* Test an integrator that applies constraints directly to velocities.
*/
void testVelocityConstraints() {
const int numParticles = 10;
CudaPlatform platform;
System system;
CustomIntegrator integrator(0.002);
integrator.addPerDofVariable("x1", 0);
integrator.addComputePerDof("v", "v+0.5*dt*f/m");
integrator.addComputePerDof("x", "x+dt*v");
integrator.addComputePerDof("x1", "x");
integrator.addConstrainPositions();
integrator.addComputePerDof("v", "v+0.5*dt*f/m+(x-x1)/dt");
integrator.addConstrainVelocities();
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(i%2 == 0 ? 5.0 : 10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
// Constrain the first three particles with SHAKE.
system.addConstraint(0, 1, 1.0);
system.addConstraint(1, 2, 1.0);
// Constrain the next three with SETTLE.
system.addConstraint(3, 4, 1.0);
system.addConstraint(5, 4, 1.0);
system.addConstraint(3, 5, sqrt(2.0));
// Constraint the rest with CCMA.
for (int i = 6; i < numParticles-1; ++i)
system.addConstraint(i, i+1, 1.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3(i/2, (i+1)/2, 0);
velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
integrator.step(2);
State state = context.getState(State::Positions | State::Velocities | State::Energy);
for (int j = 0; j < system.getNumConstraints(); ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 2e-5);
if (i > 0) {
Vec3 v1 = state.getVelocities()[particle1];
Vec3 v2 = state.getVelocities()[particle2];
double vel = (v1-v2).dot(p1-p2);
ASSERT_EQUAL_TOL(0.0, vel, 2e-5);
}
}
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
if (i == 0)
initialEnergy = energy;
else if (i > 0)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
}
}
/**
* Test an integrator with an AndersenThermostat to see if updateContextState()
* is being handled correctly.
*/
void testWithThermostat() {
const int numParticles = 8;
const double temp = 100.0;
const double collisionFreq = 10.0;
const int numSteps = 10000;
CudaPlatform platform;
System system;
CustomIntegrator integrator(0.005);
integrator.addUpdateContextState();
integrator.addComputePerDof("v", "v+dt*f/m");
integrator.addComputePerDof("x", "x+dt*v");
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
AndersenThermostat* thermostat = new AndersenThermostat(temp, collisionFreq);
system.addForce(thermostat);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; ++i)
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
context.setPositions(positions);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the temperature is correct.
double ke = 0.0;
for (int i = 0; i < numSteps; ++i) {
State state = context.getState(State::Energy);
ke += state.getKineticEnergy();
integrator.step(1);
}
ke /= numSteps;
double expected = 0.5*numParticles*3*BOLTZ*temp;
ASSERT_USUALLY_EQUAL_TOL(expected, ke, 6/std::sqrt((double) numSteps));
}
/**
* Test a Monte Carlo integrator that uses global variables and depends on energy.
*/
void testMonteCarlo() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
CustomIntegrator integrator(0.1);
const double kT = BOLTZ*300.0;
integrator.addGlobalVariable("kT", kT);
integrator.addGlobalVariable("oldE", 0);
integrator.addGlobalVariable("accept", 0);
integrator.addPerDofVariable("oldx", 0);
integrator.addComputeGlobal("oldE", "energy");
integrator.addComputePerDof("oldx", "x");
integrator.addComputePerDof("x", "x+dt*gaussian");
integrator.addComputeGlobal("accept", "step(exp((oldE-energy)/kT)-uniform)");
integrator.addComputePerDof("x", "accept*x + (1-accept)*oldx");
HarmonicBondForce* forceField = new HarmonicBondForce();
forceField->addBond(0, 1, 2.0, 10.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
// Compute the histogram of distances and see if it satisfies a Boltzmann distribution.
const int numBins = 100;
const double maxDist = 4.0;
const int numIterations = 5000;
vector<int> counts(numBins, 0);
for (int i = 0; i < numIterations; ++i) {
integrator.step(10);
State state = context.getState(State::Positions);
Vec3 delta = state.getPositions()[0]-state.getPositions()[1];
double dist = sqrt(delta.dot(delta));
if (dist < maxDist)
counts[(int) (numBins*dist/maxDist)]++;
}
vector<double> expected(numBins, 0);
double sum = 0;
for (int i = 0; i < numBins; i++) {
double dist = (i+0.5)*maxDist/numBins;
expected[i] = dist*dist*exp(-5.0*(dist-2)*(dist-2)/kT);
sum += expected[i];
}
for (int i = 0; i < numBins; i++)
ASSERT_USUALLY_EQUAL_TOL((double) counts[i]/numIterations, expected[i]/sum, 0.01);
}
/**
* Test the ComputeSum operation.
*/
void testSum() {
const int numParticles = 200;
const double boxSize = 10;
CudaPlatform platform;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
NonbondedForce* nb = new NonbondedForce();
system.addForce(nb);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.5);
nb->addParticle(i%2 == 0 ? 1 : -1, 0.1, 1);
bool close = true;
while (close) {
positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
close = false;
for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1)
close = true;
}
}
}
CustomIntegrator integrator(0.01);
integrator.addGlobalVariable("ke", 0);
integrator.addComputePerDof("v", "v+dt*f/m");
integrator.addComputePerDof("x", "x+dt*v");
integrator.addComputeSum("ke", "m*v*v/2");
Context context(system, integrator, platform);
context.setPositions(positions);
// See if the sum is being computed correctly.
State state = context.getState(State::Energy);
const double initialEnergy = state.getKineticEnergy()+state.getPotentialEnergy();
for (int i = 0; i < 100; ++i) {
state = context.getState(State::Energy);
ASSERT_EQUAL_TOL(state.getKineticEnergy(), integrator.getGlobalVariable(0), 1e-5);
integrator.step(1);
}
}
/**
* Test an integrator that both uses and modifies a context parameter.
*/
void testParameter() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
AndersenThermostat* thermostat = new AndersenThermostat(0.1, 0.1);
system.addForce(thermostat);
CustomIntegrator integrator(0.1);
integrator.addGlobalVariable("temp", 0);
integrator.addComputeGlobal("temp", "AndersenTemperature");
integrator.addComputeGlobal("AndersenTemperature", "temp*2");
Context context(system, integrator, platform);
// See if the parameter is being used correctly.
for (int i = 0; i < 10; i++) {
integrator.step(1);
ASSERT_EQUAL_TOL(context.getParameter("AndersenTemperature"), 0.1*(1<<(i+1)), 1e-5);
}
}
/**
* Test random number distributions.
*/
void testRandomDistributions() {
const int numParticles = 100;
const int numBins = 20;
const int numSteps = 100;
CudaPlatform platform;
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomIntegrator integrator(0.1);
integrator.addPerDofVariable("a", 0);
integrator.addPerDofVariable("b", 0);
integrator.addComputePerDof("a", "uniform");
integrator.addComputePerDof("b", "gaussian");
Context context(system, integrator, platform);
// See if the random numbers are distributed correctly.
vector<int> bins(numBins);
double mean = 0.0;
double var = 0.0;
double skew = 0.0;
double kurtosis = 0.0;
vector<Vec3> values;
for (int i = 0; i < numSteps; i++) {
integrator.step(1);
integrator.getPerDofVariable(0, values);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v = values[i][j];
ASSERT(v >= 0 && v < 1);
bins[(int) (v*numBins)]++;
}
integrator.getPerDofVariable(1, values);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v = values[i][j];
mean += v;
var += v*v;
skew += v*v*v;
kurtosis += v*v*v*v;
}
}
// Check the distribution of uniform randoms.
int numValues = numParticles*numSteps*3;
double expected = numValues/(double) numBins;
double tol = 4*sqrt(expected);
for (int i = 0; i < numBins; i++)
ASSERT(bins[i] >= expected-tol && bins[i] <= expected+tol);
// Check the distribution of gaussian randoms.
mean /= numValues;
var /= numValues;
skew /= numValues;
kurtosis /= numValues;
double c2 = var-mean*mean;
double c3 = skew-3*var*mean+2*mean*mean*mean;
double c4 = kurtosis-4*skew*mean-3*var*var+12*var*mean*mean-6*mean*mean*mean*mean;
ASSERT_EQUAL_TOL(0.0, mean, 3.0/sqrt((double) numValues));
ASSERT_EQUAL_TOL(1.0, c2, 3.0/pow(numValues, 1.0/3.0));
ASSERT_EQUAL_TOL(0.0, c3, 3.0/pow(numValues, 1.0/4.0));
ASSERT_EQUAL_TOL(0.0, c4, 3.0/pow(numValues, 1.0/4.0));
}
/**
* Test getting and setting per-DOF variables.
*/
void testPerDofVariables() {
const int numParticles = 200;
const double boxSize = 10;
CudaPlatform platform;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
NonbondedForce* nb = new NonbondedForce();
system.addForce(nb);
nb->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.5);
nb->addParticle(i%2 == 0 ? 1 : -1, 0.1, 1);
bool close = true;
while (close) {
positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
close = false;
for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1)
close = true;
}
}
}
CustomIntegrator integrator(0.01);
integrator.addPerDofVariable("temp", 0);
integrator.addPerDofVariable("pos", 0);
integrator.addComputePerDof("v", "v+dt*f/m");
integrator.addComputePerDof("x", "x+dt*v");
integrator.addComputePerDof("pos", "x");
Context context(system, integrator, platform);
context.setPositions(positions);
vector<Vec3> initialValues(numParticles);
for (int i = 0; i < numParticles; i++)
initialValues[i] = Vec3(i+0.1, i+0.2, i+0.3);
integrator.setPerDofVariable(0, initialValues);
// Run a simulation, then query per-DOF values and see if they are correct.
vector<Vec3> values;
context.getState(State::Forces); // Cause atom reordering to happen before the first step
for (int i = 0; i < 200; ++i) {
integrator.step(1);
State state = context.getState(State::Positions);
integrator.getPerDofVariable(0, values);
for (int j = 0; j < numParticles; j++)
ASSERT_EQUAL_VEC(initialValues[j], values[j], 1e-5);
integrator.getPerDofVariable(1, values);
for (int j = 0; j < numParticles; j++)
ASSERT_EQUAL_VEC(state.getPositions()[j], values[j], 1e-5);
}
}
/**
* Test evaluating force groups separately.
*/
void testForceGroups() {
CudaPlatform platform;
System system;
system.addParticle(2.0);
system.addParticle(2.0);
CustomIntegrator integrator(0.01);
integrator.addPerDofVariable("outf", 0);
integrator.addPerDofVariable("outf1", 0);
integrator.addPerDofVariable("outf2", 0);
integrator.addGlobalVariable("oute", 0);
integrator.addGlobalVariable("oute1", 0);
integrator.addGlobalVariable("oute2", 0);
integrator.addComputePerDof("outf", "f");
integrator.addComputePerDof("outf1", "f1");
integrator.addComputePerDof("outf2", "f2");
integrator.addComputeGlobal("oute", "energy");
integrator.addComputeGlobal("oute1", "energy1");
integrator.addComputeGlobal("oute2", "energy2");
HarmonicBondForce* bonds = new HarmonicBondForce();
bonds->addBond(0, 1, 1.5, 1.1);
bonds->setForceGroup(1);
system.addForce(bonds);
NonbondedForce* nb = new NonbondedForce();
nb->addParticle(0.2, 1, 0);
nb->addParticle(0.2, 1, 0);
nb->setForceGroup(2);
system.addForce(nb);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
// See if the various forces are computed correctly.
integrator.step(1);
vector<Vec3> f, f1, f2;
double e1 = 0.5*1.1*0.5*0.5;
double e2 = 138.935456*0.2*0.2/2.0;
integrator.getPerDofVariable(0, f);
integrator.getPerDofVariable(1, f1);
integrator.getPerDofVariable(2, f2);
ASSERT_EQUAL_VEC(Vec3(1.1*0.5, 0, 0), f1[0], 1e-5);
ASSERT_EQUAL_VEC(Vec3(-1.1*0.5, 0, 0), f1[1], 1e-5);
ASSERT_EQUAL_VEC(Vec3(-138.935456*0.2*0.2/4.0, 0, 0), f2[0], 1e-5);
ASSERT_EQUAL_VEC(Vec3(138.935456*0.2*0.2/4.0, 0, 0), f2[1], 1e-5);
ASSERT_EQUAL_VEC(f1[0]+f2[0], f[0], 1e-5);
ASSERT_EQUAL_VEC(f1[1]+f2[1], f[1], 1e-5);
ASSERT_EQUAL_TOL(e1, integrator.getGlobalVariable(1), 1e-5);
ASSERT_EQUAL_TOL(e2, integrator.getGlobalVariable(2), 1e-5);
ASSERT_EQUAL_TOL(e1+e2, integrator.getGlobalVariable(0), 1e-5);
// Make sure they also match the values returned by the Context.
State s = context.getState(State::Forces | State::Energy, false);
State s1 = context.getState(State::Forces | State::Energy, false, 2);
State s2 = context.getState(State::Forces | State::Energy, false, 4);
vector<Vec3> c, c1, c2;
c = context.getState(State::Forces, false).getForces();
c1 = context.getState(State::Forces, false, 2).getForces();
c2 = context.getState(State::Forces, false, 4).getForces();
ASSERT_EQUAL_VEC(f[0], c[0], 1e-5);
ASSERT_EQUAL_VEC(f[1], c[1], 1e-5);
ASSERT_EQUAL_VEC(f1[0], c1[0], 1e-5);
ASSERT_EQUAL_VEC(f1[1], c1[1], 1e-5);
ASSERT_EQUAL_VEC(f2[0], c2[0], 1e-5);
ASSERT_EQUAL_VEC(f2[1], c2[1], 1e-5);
ASSERT_EQUAL_TOL(s.getPotentialEnergy(), integrator.getGlobalVariable(0), 1e-5);
ASSERT_EQUAL_TOL(s1.getPotentialEnergy(), integrator.getGlobalVariable(1), 1e-5);
ASSERT_EQUAL_TOL(s2.getPotentialEnergy(), integrator.getGlobalVariable(2), 1e-5);
}
/**
* Test a multiple time step r-RESPA integrator.
*/
void testRespa() {
const int numParticles = 8;
CudaPlatform platform;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(4, 0, 0), Vec3(0, 4, 0), Vec3(0, 0, 4));
CustomIntegrator integrator(0.002);
integrator.addComputePerDof("v", "v+0.5*dt*f1/m");
for (int i = 0; i < 2; i++) {
integrator.addComputePerDof("v", "v+0.5*(dt/2)*f0/m");
integrator.addComputePerDof("x", "x+(dt/2)*v");
integrator.addComputePerDof("v", "v+0.5*(dt/2)*f0/m");
}
integrator.addComputePerDof("v", "v+0.5*dt*f1/m");
HarmonicBondForce* bonds = new HarmonicBondForce();
for (int i = 0; i < numParticles-2; i++)
bonds->addBond(i, i+1, 1.0, 0.5);
system.addForce(bonds);
NonbondedForce* nb = new NonbondedForce();
nb->setCutoffDistance(2.0);
nb->setNonbondedMethod(NonbondedForce::Ewald);
for (int i = 0; i < numParticles; ++i) {
system.addParticle(i%2 == 0 ? 5.0 : 10.0);
nb->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
nb->setForceGroup(1);
nb->setReciprocalSpaceForceGroup(0);
system.addForce(nb);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3(i/2, (i+1)/2, 0);
velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and monitor energy conservations.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Energy);
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
integrator.step(2);
}
}
int main() {
try {
testSingleBond();
testConstraints();
testVelocityConstraints();
testWithThermostat();
testMonteCarlo();
testSum();
testParameter();
testRandomDistributions();
testPerDofVariables();
testForceGroups();
testRespa();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests all the different force terms in the CUDA implementation of CustomNonbondedForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "sfmt/SFMT.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomNonbondedForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testSimpleExpression() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("-0.1*r^3");
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(2, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
double force = 0.1*3*(2*2);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(-0.1*(2*2*2), state.getPotentialEnergy(), TOL);
}
void testParameters() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("scale*a*(r*b)^3; a=a1*a2; b=c+b1+b2");
forceField->addPerParticleParameter("a");
forceField->addPerParticleParameter("b");
forceField->addGlobalParameter("scale", 3.0);
forceField->addGlobalParameter("c", -1.0);
vector<double> params(2);
params[0] = 1.5;
params[1] = 2.0;
forceField->addParticle(params);
params[0] = 2.0;
params[1] = 3.0;
forceField->addParticle(params);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(2, 0, 0);
context.setPositions(positions);
context.setParameter("scale", 1.0);
context.setParameter("c", 0.0);
State state = context.getState(State::Forces | State::Energy);
vector<Vec3> forces = state.getForces();
double force = -3.0*3*5.0*(10*10);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(3.0*(10*10*10), state.getPotentialEnergy(), TOL);
// Try changing the global parameters and make sure it's still correct.
context.setParameter("scale", 1.5);
context.setParameter("c", 1.0);
state = context.getState(State::Forces | State::Energy);
forces = state.getForces();
force = -1.5*3.0*3*6.0*(12*12);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(1.5*3.0*(12*12*12), state.getPotentialEnergy(), TOL);
// Try changing the per-particle parameters and make sure it's still correct.
params[0] = 1.6;
params[1] = 2.1;
forceField->setParticleParameters(0, params);
params[0] = 1.9;
params[1] = 2.8;
forceField->setParticleParameters(1, params);
forceField->updateParametersInContext(context);
state = context.getState(State::Forces | State::Energy);
forces = state.getForces();
force = -1.5*1.6*1.9*3*5.9*(11.8*11.8);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(1.5*1.6*1.9*(11.8*11.8*11.8), state.getPotentialEnergy(), TOL);
}
void testManyParameters() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("(a1*a2+b1*b2+c1*c2+d1*d2+e1*e2)*r");
forceField->addPerParticleParameter("a");
forceField->addPerParticleParameter("b");
forceField->addPerParticleParameter("c");
forceField->addPerParticleParameter("d");
forceField->addPerParticleParameter("e");
vector<double> params(5);
params[0] = 1.0;
params[1] = 2.0;
params[2] = 3.0;
params[3] = 4.0;
params[4] = 5.0;
forceField->addParticle(params);
params[0] = 1.1;
params[1] = 1.2;
params[2] = 1.3;
params[3] = 1.4;
params[4] = 1.5;
forceField->addParticle(params);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(2, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
vector<Vec3> forces = state.getForces();
double force = 1*1.1 + 2*1.2 + 3*1.3 + 4*1.4 + 5*1.5;
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[1], TOL);
ASSERT_EQUAL_TOL(2*force, state.getPotentialEnergy(), TOL);
}
void testExclusions() {
CudaPlatform platform;
System system;
VerletIntegrator integrator(0.01);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("a*r; a=a1+a2");
nonbonded->addPerParticleParameter("a");
vector<double> params(1);
vector<Vec3> positions(4);
for (int i = 0; i < 4; i++) {
system.addParticle(1.0);
params[0] = i+1;
nonbonded->addParticle(params);
positions[i] = Vec3(i, 0, 0);
}
nonbonded->addExclusion(0, 1);
nonbonded->addExclusion(1, 2);
nonbonded->addExclusion(2, 3);
nonbonded->addExclusion(0, 2);
nonbonded->addExclusion(1, 3);
system.addForce(nonbonded);
Context context(system, integrator, platform);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(1+4, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[2], TOL);
ASSERT_EQUAL_VEC(Vec3(-(1+4), 0, 0), forces[3], TOL);
ASSERT_EQUAL_TOL((1+4)*3.0, state.getPotentialEnergy(), TOL);
}
void testCutoff() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("r");
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
forceField->setCutoffDistance(2.5);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 2, 0);
positions[2] = Vec3(0, 3, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0, 1, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -1, 0), forces[2], TOL);
ASSERT_EQUAL_TOL(2.0+1.0, state.getPotentialEnergy(), TOL);
}
void testPeriodic() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("r");
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
forceField->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
forceField->setCutoffDistance(2.0);
system.setDefaultPeriodicBoxVectors(Vec3(4, 0, 0), Vec3(0, 4, 0), Vec3(0, 0, 4));
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 2.1, 0);
positions[2] = Vec3(0, 3, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0, -2, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 2, 0), forces[1], TOL);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[2], TOL);
ASSERT_EQUAL_TOL(1.9+1+0.9, state.getPotentialEnergy(), TOL);
}
void testTabulatedFunction() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* forceField = new CustomNonbondedForce("fn(r)+1");
forceField->addParticle(vector<double>());
forceField->addParticle(vector<double>());
vector<double> table;
for (int i = 0; i < 21; i++)
table.push_back(std::sin(0.25*i));
forceField->addFunction("fn", table, 1.0, 6.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
double tol = 0.01;
for (int i = 1; i < 30; i++) {
double x = (7.0/30.0)*i;
positions[1] = Vec3(x, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
double force = (x < 1.0 || x > 6.0 ? 0.0 : -std::cos(x-1.0));
double energy = (x < 1.0 || x > 6.0 ? 0.0 : std::sin(x-1.0))+1.0;
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], 0.1);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], 0.1);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), 0.02);
}
for (int i = 1; i < 20; i++) {
double x = 0.25*i+1.0;
positions[1] = Vec3(x, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Energy);
double energy = (x < 1.0 || x > 6.0 ? 0.0 : std::sin(x-1.0))+1.0;
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), 1e-4);
}
}
void testCoulombLennardJones() {
const int numMolecules = 300;
const int numParticles = numMolecules*2;
const double boxSize = 20.0;
CudaPlatform platform;
// Create two systems: one with a NonbondedForce, and one using a CustomNonbondedForce to implement the same interaction.
System standardSystem;
System customSystem;
for (int i = 0; i < numParticles; i++) {
standardSystem.addParticle(1.0);
customSystem.addParticle(1.0);
}
NonbondedForce* standardNonbonded = new NonbondedForce();
CustomNonbondedForce* customNonbonded = new CustomNonbondedForce("4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)");
customNonbonded->addPerParticleParameter("q");
customNonbonded->addPerParticleParameter("sigma");
customNonbonded->addPerParticleParameter("eps");
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<double> params(3);
for (int i = 0; i < numMolecules; i++) {
if (i < numMolecules/2) {
standardNonbonded->addParticle(1.0, 0.2, 0.1);
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.1;
customNonbonded->addParticle(params);
standardNonbonded->addParticle(-1.0, 0.1, 0.1);
params[0] = -1.0;
params[1] = 0.1;
customNonbonded->addParticle(params);
}
else {
standardNonbonded->addParticle(1.0, 0.2, 0.2);
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.2;
customNonbonded->addParticle(params);
standardNonbonded->addParticle(-1.0, 0.1, 0.2);
params[0] = -1.0;
params[1] = 0.1;
customNonbonded->addParticle(params);
}
positions[2*i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
velocities[2*i] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
velocities[2*i+1] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
standardNonbonded->addException(2*i, 2*i+1, 0.0, 1.0, 0.0);
customNonbonded->addExclusion(2*i, 2*i+1);
}
standardNonbonded->setNonbondedMethod(NonbondedForce::NoCutoff);
customNonbonded->setNonbondedMethod(CustomNonbondedForce::NoCutoff);
standardSystem.addForce(standardNonbonded);
customSystem.addForce(customNonbonded);
VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01);
Context context1(standardSystem, integrator1, platform);
Context context2(customSystem, integrator2, platform);
context1.setPositions(positions);
context2.setPositions(positions);
context1.setVelocities(velocities);
context2.setVelocities(velocities);
State state1 = context1.getState(State::Forces | State::Energy);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-4);
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-4);
}
}
void testParallelComputation() {
CudaPlatform platform;
System system;
const int numParticles = 200;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomNonbondedForce* force = new CustomNonbondedForce("4*eps*((sigma/r)^12-(sigma/r)^6); sigma=0.5; eps=1");
vector<double> params;
for (int i = 0; i < numParticles; i++)
force->addParticle(params);
system.addForce(force);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt));
for (int i = 0; i < numParticles; ++i)
for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1)
force->addExclusion(i, j);
}
VerletIntegrator integrator1(0.01);
Context context1(system, integrator1, platform);
context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy);
VerletIntegrator integrator2(0.01);
string deviceIndex = platform.getPropertyValue(context1, CudaPlatform::CudaDeviceIndex());
map<string, string> props;
props[CudaPlatform::CudaDeviceIndex()] = deviceIndex+","+deviceIndex;
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-5);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
}
int main() {
try {
testSimpleExpression();
testParameters();
testManyParameters();
testExclusions();
testCutoff();
testPeriodic();
testTabulatedFunction();
testCoulombLennardJones();
// testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment