Commit 4c28df55 authored by Peter Eastman's avatar Peter Eastman
Browse files

CUDA implementation of vector functions for CustomIntegrator

parent cb92103e
......@@ -1495,9 +1495,8 @@ class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
hasInitializedKernels(false), needsEnergyParamDerivs(false) {
}
~CudaIntegrateCustomStepKernel();
/**
* Initialize the kernel.
*
......@@ -1561,7 +1560,7 @@ private:
class ReorderListener;
class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator,
const std::string& forceName, const std::string& energyName, std::vector<const TabulatedFunction*>& functions,
std::vector<std::pair<std::string, std::string> >& functionNames);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
......@@ -1574,21 +1573,21 @@ private:
double energy;
float energyFloat;
int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent;
bool hasInitializedKernels, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
std::vector<bool> deviceValuesAreCurrent;
mutable std::vector<bool> localValuesAreCurrent;
CudaArray globalValues;
CudaArray sumBuffer;
CudaArray summedValue;
CudaArray uniformRandoms;
CudaArray randomSeed;
CudaArray perDofEnergyParamDerivs;
std::vector<CudaArray> tabulatedFunctions;
std::vector<CudaArray> tabulatedFunctions, perDofValues;
std::map<int, double> savedEnergy;
std::map<int, CudaArray> savedForces;
std::set<int> validSavedForces;
CudaParameterSet* perDofValues;
mutable std::vector<std::vector<float> > localPerDofValuesFloat;
mutable std::vector<std::vector<double> > localPerDofValuesDouble;
mutable std::vector<std::vector<float4> > localPerDofValuesFloat;
mutable std::vector<std::vector<double4> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<float> localPerDofEnergyParamDerivsFloat;
......
......@@ -203,11 +203,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
break;
}
}
if (this->deviceIndex == -1)
if (this->deviceIndex == -1) {
if (deviceIndex != -1)
throw OpenMMException("The requested CUDA device could not be loaded");
else
throw OpenMMException("No compatible CUDA device is available");
}
}
else {
isLinkedContext = true;
......
This diff is collapsed.
......@@ -23,10 +23,14 @@ inline __device__ void storePos(real4* __restrict__ posq, real4* __restrict__ po
#endif
}
inline __device__ double4 convertToDouble4(mixed4 a) {
inline __device__ double4 convertToDouble4(float4 a) {
return make_double4(a.x, a.y, a.z, a.w);
}
inline __device__ double4 convertToDouble4(double4 a) {
return a;
}
inline __device__ mixed4 convertFromDouble4(double4 a) {
return make_mixed4(a.x, a.y, a.z, a.w);
}
......@@ -36,7 +40,7 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues,
const mixed energy, mixed* __restrict__ energyParamDerivs
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
double3 stepSize = make_double3(dt[0].y);
int index = blockIdx.x*blockDim.x+threadIdx.x;
const double forceScale = 1.0/0xFFFFFFFF;
while (index < NUM_ATOMS) {
......
......@@ -7592,26 +7592,15 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator,
const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
string tempType = (cl.getSupportsDoublePrecision() ? "double3" : "float3");
string convert = (cl.getSupportsDoublePrecision() ? "convert_double3" : "");
map<string, Lepton::ParsedExpression> expressions;
if (variable == "x")
expressions["position.xyz = "] = expr;
else if (variable == "v")
expressions["velocity.xyz = "] = expr;
else if (variable == "")
expressions[tempType+" tempSum = "] = expr;
else {
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
if (variable == integrator.getPerDofVariableName(i))
expressions["perDof"+cl.intToString(i)+" = "] = expr;
}
if (expressions.size() == 0)
throw OpenMMException("Unknown per-DOF variable: "+variable);
expressions[tempType+" tempResult = "] = expr;
map<string, string> variables;
variables["x"] = "position.xyz";
variables["v"] = "velocity.xyz";
variables[forceName] = "f.xyz";
variables["gaussian"] = "convert_mixed4(gaussian).xyz";
variables["uniform"] = "convert_mixed4(uniform).xyz";
variables["x"] = convert+"(position.xyz)";
variables["v"] = convert+"(velocity.xyz)";
variables[forceName] = convert+"(f.xyz)";
variables["gaussian"] = convert+"(gaussian.xyz)";
variables["uniform"] = convert+"(uniform.xyz)";
variables["m"] = "mass";
variables["dt"] = "stepSize";
if (energyName != "")
......@@ -7619,7 +7608,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(globalVariableIndex[i])+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
variables[integrator.getPerDofVariableName(i)] = "perDof"+cl.intToString(i);
variables[integrator.getPerDofVariableName(i)] = convert+"(perDof"+cl.intToString(i)+")";
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]";
vector<pair<ExpressionTreeNode, string> > variableNodes;
......@@ -7627,8 +7616,19 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
for (auto& var : variables)
variableNodes.push_back(make_pair(ExpressionTreeNode(new Operation::Variable(var.first)), var.second));
string result = cl.getExpressionUtilities().createExpressions(expressions, variableNodes, functions, functionNames, "temp", tempType);
if (variable == "")
result += "sum[index] = tempSum.x+tempSum.y+tempSum.z;\n";
if (variable == "x")
result += "position.x = tempResult.x; position.y = tempResult.y; position.z = tempResult.z;\n";
else if (variable == "v")
result += "velocity.x = tempResult.x; velocity.y = tempResult.y; velocity.z = tempResult.z;\n";
else if (variable == "")
result += "sum[index] = tempResult.x+tempResult.y+tempResult.z;\n";
else {
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
if (variable == integrator.getPerDofVariableName(i)) {
string varName = "perDof"+cl.intToString(i);
result += varName+".x = tempResult.x; "+varName+".y = tempResult.y; "+varName+".z = tempResult.z;\n";
}
}
return result;
}
......@@ -7637,7 +7637,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
int numAtoms = cl.getNumAtoms();
int numSteps = integrator.getNumComputations();
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
string tempType = (useDouble ? "double3" : "float3");
string tempType = (cl.getSupportsDoublePrecision() ? "double3" : "float3");
string perDofType = (useDouble ? "double4" : "float4");
if (!hasInitializedKernels) {
hasInitializedKernels = true;
......@@ -7849,7 +7849,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
stringstream compute;
for (int i = 0; i < perDofValues.size(); i++)
compute << tempType<<" perDof"<<cl.intToString(i)<<" = perDofValues"<<cl.intToString(i)<<"[index].xyz;\n";
compute << tempType<<" perDof"<<cl.intToString(i)<<" = convert_"<<tempType<<"(perDofValues"<<cl.intToString(i)<<"[index].xyz);\n";
int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
numGaussian += numAtoms*usesVariable(expression[j][0], "gaussian");
......@@ -7874,7 +7874,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
compute << "velm[index] = convert_mixed4(velocity);\n";
else {
for (int i = 0; i < perDofValues.size(); i++)
compute << "perDofValues"<<cl.intToString(i)<<"[index] = ("<<perDofType<<") (perDof"<<cl.intToString(i)<<", 0);\n";
compute << "perDofValues"<<cl.intToString(i)<<"[index] = ("<<perDofType<<") (perDof"<<cl.intToString(i)<<".x, perDof"<<cl.intToString(i)<<".y, perDof"<<cl.intToString(i)<<".z, 0);\n";
}
if (numGaussian > 0)
compute << "gaussianIndex += NUM_ATOMS;\n";
......@@ -7971,7 +7971,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
stringstream computeKE;
for (int i = 0; i < perDofValues.size(); i++)
computeKE << tempType<<" perDof"<<cl.intToString(i)<<" = perDofValues"<<cl.intToString(i)<<"[index].xyz;\n";
computeKE << tempType<<" perDof"<<cl.intToString(i)<<" = convert_"<<tempType<<"(perDofValues"<<cl.intToString(i)<<"[index].xyz);\n";
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
computeKE << createPerDofComputation("", keExpression, integrator, "f", "", functionList, functionNames);
map<string, string> replacements;
......@@ -8005,7 +8005,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer());
for (auto& array : perDofValues)
kineticEnergyKernel.setArg<cl::Memory>(index++, array.getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
for (auto& array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment