Commit 785592f4 authored by peastman's avatar peastman
Browse files

Continuing to work on CUDA implementation of CustomManyParticleForce

parent 890963b9
...@@ -4468,14 +4468,17 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4468,14 +4468,17 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
string arrayName = "table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
CudaArray* array = CudaArray::create<float>(cu, f.size(), "TabulatedFunction"); tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions.push_back(array); tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
array->upload(f); tableArgs << ", const float";
string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width)); if (width > 1)
functionDefinitions.push_back(make_pair(name, arrayName)); tableArgs << width;
tableArgs << "* __restrict__ " << arrayName;
} }
// Record information about parameters. // Record information about parameters.
...@@ -4486,16 +4489,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4486,16 +4489,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
globalParamNames[i] = force.getGlobalParameterName(i); globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i); globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
} }
map<string, string> variables; vector<pair<ExpressionTreeNode, string> > variables;
for (int i = 0; i < particlesPerSet; i++) { for (int i = 0; i < particlesPerSet; i++) {
string index = cu.intToString(i+1); string index = cu.intToString(i+1);
variables["x"+index] = "pos"+index+".x"; variables.push_back(makeVariable("x"+index, "pos"+index+".x"));
variables["y"+index] = "pos"+index+".y"; variables.push_back(makeVariable("y"+index, "pos"+index+".y"));
variables["z"+index] = "pos"+index+".z"; variables.push_back(makeVariable("z"+index, "pos"+index+".z"));
} }
for (int i = 0; i < force.getNumPerParticleParameters(); i++) { for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i); const string& name = force.getPerParticleParameterName(i);
variables[name] = "params"+params->getParameterSuffix(i); for (int j = 0; j < particlesPerSet; j++) {
string index = cu.intToString(j+1);
variables.push_back(makeVariable(name+index, "params"+params->getParameterSuffix(i, index)));
}
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customManyParticleGlobals"); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customManyParticleGlobals");
...@@ -4503,7 +4509,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4503,7 +4509,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]"; string value = "globals["+cu.intToString(i)+"]";
variables[name] = value; variables.push_back(makeVariable(name, value));
} }
} }
...@@ -4528,11 +4534,11 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4528,11 +4534,11 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
const vector<int>& atoms = iter->second; const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]]; string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) { if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n"; compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
computedDeltas.insert(deltaName); computedDeltas.insert(deltaName);
} }
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n"; compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
variables[iter->first] = "r_"+deltaName; variables.push_back(makeVariable(iter->first, "r_"+deltaName));
forceExpressions["real dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize(); forceExpressions["real dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
} }
index = 0; index = 0;
...@@ -4542,15 +4548,15 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4542,15 +4548,15 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]]; string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]; string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) { if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<");\n"; compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
computedDeltas.insert(deltaName1); computedDeltas.insert(deltaName1);
} }
if (computedDeltas.count(deltaName2) == 0) { if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<");\n"; compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
computedDeltas.insert(deltaName2); computedDeltas.insert(deltaName2);
} }
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n"; compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
variables[iter->first] = angleName; variables.push_back(makeVariable(iter->first, angleName));
forceExpressions["real dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize(); forceExpressions["real dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
} }
index = 0; index = 0;
...@@ -4563,22 +4569,22 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4563,22 +4569,22 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
string crossName2 = "cross_"+deltaName2+"_"+deltaName3; string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]]; string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) { if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n"; compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
computedDeltas.insert(deltaName1); computedDeltas.insert(deltaName1);
} }
if (computedDeltas.count(deltaName2) == 0) { if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<");\n"; compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
computedDeltas.insert(deltaName2); computedDeltas.insert(deltaName2);
} }
if (computedDeltas.count(deltaName3) == 0) { if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<");\n"; compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
computedDeltas.insert(deltaName3); computedDeltas.insert(deltaName3);
} }
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n"; compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"real4 "<<crossName2<<" = computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n"; compute<<"real4 "<<crossName2<<" = computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n";
compute<<"real "<<dihedralName<<" = computeAngle("<<crossName1<<", "<<crossName2<<");\n"; compute<<"real "<<dihedralName<<" = computeAngle("<<crossName1<<", "<<crossName2<<");\n";
compute<<dihedralName<<" *= (delta"<<deltaName1<<".x*"<<crossName2<<".x + delta"<<deltaName1<<".y*"<<crossName2<<".y + delta"<<deltaName1<<".z*"<<crossName2<<".z < 0 ? -1 : 1);\n"; compute<<dihedralName<<" *= (delta"<<deltaName1<<".x*"<<crossName2<<".x + delta"<<deltaName1<<".y*"<<crossName2<<".y + delta"<<deltaName1<<".z*"<<crossName2<<".z < 0 ? -1 : 1);\n";
variables[iter->first] = dihedralName; variables.push_back(makeVariable(iter->first, dihedralName));
forceExpressions["real dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize(); forceExpressions["real dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
} }
...@@ -4670,10 +4676,12 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4670,10 +4676,12 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
// Create other replacements that depend on the number of particles per set. // Create other replacements that depend on the number of particles per set.
stringstream numCombinations, atomsForCombination, isValidCombination, permute, loadData; stringstream numCombinations, atomsForCombination, isValidCombination, permute, loadData, verifyCutoff;
for (int i = 0; i < particlesPerSet; i++) { for (int i = 0; i < particlesPerSet; i++) {
permute<<"int atom"<<(i+1)<<" = p"<<(i+1)<<";\n"; permute<<"int atom"<<(i+1)<<" = p"<<(i+1)<<";\n";
loadData<<"real4 pos"<<(i+1)<<" = posq[atom"<<(i+1)<<"];\n"; loadData<<"real4 pos"<<(i+1)<<" = posq[atom"<<(i+1)<<"];\n";
for (int j = 0; j < (int) params->getBuffers().size(); j++)
loadData<<params->getBuffers()[j].getType()<<" params"<<(j+1)<<(i+1)<<" = global_params"<<(j+1)<<"[atom"<<(i+1)<<"];\n";
} }
for (int i = 2; i < particlesPerSet; i++) { for (int i = 2; i < particlesPerSet; i++) {
if (i > 2) if (i > 2)
...@@ -4688,6 +4696,14 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4688,6 +4696,14 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
atomsForCombination<<"int p"<<(i+1)<<" = p1+1+tempIndex%numNeighbors;\n"; atomsForCombination<<"int p"<<(i+1)<<" = p1+1+tempIndex%numNeighbors;\n";
atomsForCombination<<"tempIndex /= numNeighbors;\n"; atomsForCombination<<"tempIndex /= numNeighbors;\n";
} }
if (nonbondedMethod != NoCutoff) {
int startCheckFrom = 0;
for (int i = startCheckFrom; i < particlesPerSet; i++)
verifyCutoff<<"real4 pos"<<(i+1)<<" = posq[p"<<(i+1)<<"];\n";
for (int i = startCheckFrom; i < particlesPerSet; i++)
for (int j = i+1; j < particlesPerSet; j++)
verifyCutoff<<"includeInteraction &= (delta(pos"<<(i+1)<<", pos"<<(j+1)<<", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);\n";
}
// Create replacements for extra arguments. // Create replacements for extra arguments.
...@@ -4696,7 +4712,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4696,7 +4712,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
extraArgs<<", const float* __restrict__ globals"; extraArgs<<", const float* __restrict__ globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArgs<<", const "<<buffer.getType()<<"* __restrict__ global_params"; extraArgs<<", const "<<buffer.getType()<<"* __restrict__ global_params"<<(i+1);
} }
// Create the kernels. // Create the kernels.
...@@ -4706,13 +4722,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4706,13 +4722,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
replacements["NUM_CANDIDATE_COMBINATIONS"] = numCombinations.str(); replacements["NUM_CANDIDATE_COMBINATIONS"] = numCombinations.str();
replacements["FIND_ATOMS_FOR_COMBINATION_INDEX"] = atomsForCombination.str(); replacements["FIND_ATOMS_FOR_COMBINATION_INDEX"] = atomsForCombination.str();
replacements["IS_VALID_COMBINATION"] = isValidCombination.str(); replacements["IS_VALID_COMBINATION"] = isValidCombination.str();
replacements["VERIFY_CUTOFF"] = verifyCutoff.str();
replacements["PERMUTE_ATOMS"] = permute.str(); replacements["PERMUTE_ATOMS"] = permute.str();
replacements["LOAD_PARTICLE_DATA"] = loadData.str(); replacements["LOAD_PARTICLE_DATA"] = loadData.str();
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
map<string, string> defines; map<string, string> defines;
if (nonbondedMethod != NoCutoff)
defines["USE_CUTOFF"] = "1";
if (nonbondedMethod == CutoffPeriodic)
defines["USE_PERIODIC"] = "1";
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["M_PI"] = cu.doubleToString(M_PI); defines["M_PI"] = cu.doubleToString(M_PI);
defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
std::cout << cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customManyParticle, replacements)<< std::endl; std::cout << cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customManyParticle, replacements)<< std::endl;
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customManyParticle, replacements), defines); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customManyParticle, replacements), defines);
forceKernel = cu.getKernel(module, "computeInteraction"); forceKernel = cu.getKernel(module, "computeInteraction");
...@@ -4725,10 +4747,8 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -4725,10 +4747,8 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs.push_back(&cu.getForce().getDevicePointer()); forceArgs.push_back(&cu.getForce().getDevicePointer());
forceArgs.push_back(&cu.getEnergyBuffer().getDevicePointer()); forceArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
forceArgs.push_back(&cu.getPosq().getDevicePointer()); forceArgs.push_back(&cu.getPosq().getDevicePointer());
if (nonbondedMethod != NoCutoff) {
forceArgs.push_back(cu.getPeriodicBoxSizePointer()); forceArgs.push_back(cu.getPeriodicBoxSizePointer());
forceArgs.push_back(cu.getInvPeriodicBoxSizePointer()); forceArgs.push_back(cu.getInvPeriodicBoxSizePointer());
}
if (globals != NULL) if (globals != NULL)
forceArgs.push_back(&globals->getDevicePointer()); forceArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
......
...@@ -15,10 +15,16 @@ inline __device__ real3 trim(real4 v) { ...@@ -15,10 +15,16 @@ inline __device__ real3 trim(real4 v) {
} }
/** /**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude. * Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/ */
inline __device__ real4 delta(real4 vec1, real4 vec2) { inline __device__ real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0); real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
} }
...@@ -56,10 +62,8 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) { ...@@ -56,10 +62,8 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Compute the interaction. * Compute the interaction.
*/ */
extern "C" __global__ void computeInteraction( extern "C" __global__ void computeInteraction(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
#ifdef USE_CUTOFF real4 periodicBoxSize, real4 invPeriodicBoxSize
, real4 periodicBoxSize, real4 invPeriodicBoxSize
#endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real energy = 0.0f; real energy = 0.0f;
...@@ -71,6 +75,11 @@ extern "C" __global__ void computeInteraction( ...@@ -71,6 +75,11 @@ extern "C" __global__ void computeInteraction(
for (int index = threadIdx.x; index < numCombinations; index += blockDim.x) { for (int index = threadIdx.x; index < numCombinations; index += blockDim.x) {
FIND_ATOMS_FOR_COMBINATION_INDEX; FIND_ATOMS_FOR_COMBINATION_INDEX;
bool includeInteraction = IS_VALID_COMBINATION; bool includeInteraction = IS_VALID_COMBINATION;
#ifdef USE_CUTOFF
if (includeInteraction) {
VERIFY_CUTOFF;
}
#endif
if (includeInteraction) { if (includeInteraction) {
PERMUTE_ATOMS; PERMUTE_ATOMS;
LOAD_PARTICLE_DATA; LOAD_PARTICLE_DATA;
......
...@@ -466,12 +466,12 @@ int main(int argc, char* argv[]) { ...@@ -466,12 +466,12 @@ int main(int argc, char* argv[]) {
if (argc > 1) if (argc > 1)
platform.setPropertyDefaultValue("CudaPrecision", string(argv[1])); platform.setPropertyDefaultValue("CudaPrecision", string(argv[1]));
testNoCutoff(); testNoCutoff();
// testCutoff(); testCutoff();
// testPeriodic(); testPeriodic();
// testExclusions(); // testExclusions();
// testAllTerms(); // testAllTerms();
// testParameters(); testParameters();
// testTabulatedFunctions(); testTabulatedFunctions();
// testTypeFilters(); // testTypeFilters();
} }
catch(const exception& e) { catch(const exception& e) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment