Commit a306aa58 authored by peastman's avatar peastman
Browse files

Minor optimization to CUDA version of CustomManyParticleForce

parent f5acdd7a
...@@ -931,7 +931,7 @@ private: ...@@ -931,7 +931,7 @@ private:
class CudaCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel { class CudaCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel {
public: public:
CudaCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomManyParticleForceKernel(name, platform), CudaCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomManyParticleForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), params(NULL), globals(NULL), particleTypes(NULL), orderIndex(NULL), particleOrder(NULL), exclusions(NULL), hasInitializedKernel(false), cu(cu), params(NULL), particleTypes(NULL), orderIndex(NULL), particleOrder(NULL), exclusions(NULL),
exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighborPairs(NULL), numNeighborPairs(NULL), neighborStartIndex(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighborPairs(NULL), numNeighborPairs(NULL), neighborStartIndex(NULL),
numNeighborsForAtom(NULL), neighbors(NULL), system(system) { numNeighborsForAtom(NULL), neighbors(NULL), system(system) {
} }
...@@ -966,7 +966,6 @@ private: ...@@ -966,7 +966,6 @@ private:
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize; int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals;
CudaArray* particleTypes; CudaArray* particleTypes;
CudaArray* orderIndex; CudaArray* orderIndex;
CudaArray* particleOrder; CudaArray* particleOrder;
...@@ -985,6 +984,7 @@ private: ...@@ -985,6 +984,7 @@ private:
std::vector<void*> forceArgs, blockBoundsArgs, neighborsArgs, startIndicesArgs, copyPairsArgs; std::vector<void*> forceArgs, blockBoundsArgs, neighborsArgs, startIndicesArgs, copyPairsArgs;
const System& system; const System& system;
CUfunction forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel; CUfunction forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel;
CUdeviceptr globalsPtr;
CUevent event; CUevent event;
}; };
......
...@@ -4440,8 +4440,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel() ...@@ -4440,8 +4440,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel()
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
if (orderIndex != NULL) if (orderIndex != NULL)
delete orderIndex; delete orderIndex;
if (particleOrder != NULL) if (particleOrder != NULL)
...@@ -4481,8 +4479,6 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4481,8 +4479,6 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
// Record parameter values. // Record parameter values.
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customManyParticleParameters"); params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customManyParticleParameters");
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customManyParticleGlobals");
vector<vector<float> > paramVector(numParticles); vector<vector<float> > paramVector(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
vector<double> parameters; vector<double> parameters;
...@@ -4540,8 +4536,6 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4540,8 +4536,6 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
} }
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customManyParticleGlobals");
globals->upload(globalParamValues);
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]"; string value = "globals["+cu.intToString(i)+"]";
...@@ -4838,8 +4832,6 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4838,8 +4832,6 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
// Create replacements for extra arguments. // Create replacements for extra arguments.
stringstream extraArgs; stringstream extraArgs;
if (force.getNumGlobalParameters() > 0)
extraArgs<<", const float* __restrict__ globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArgs<<", const "<<buffer.getType()<<"* __restrict__ global_params"<<(i+1); extraArgs<<", const "<<buffer.getType()<<"* __restrict__ global_params"<<(i+1);
...@@ -4873,6 +4865,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4873,6 +4865,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["TILE_SIZE"] = cu.intToString(CudaContext::TileSize); defines["TILE_SIZE"] = cu.intToString(CudaContext::TileSize);
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks()); defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["NUM_GLOBALS"] = cu.intToString(max(1, force.getNumGlobalParameters()));
defines["FIND_NEIGHBORS_WORKGROUP_SIZE"] = cu.intToString(findNeighborsWorkgroupSize); defines["FIND_NEIGHBORS_WORKGROUP_SIZE"] = cu.intToString(findNeighborsWorkgroupSize);
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customManyParticle, replacements), defines); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customManyParticle, replacements), defines);
forceKernel = cu.getKernel(module, "computeInteraction"); forceKernel = cu.getKernel(module, "computeInteraction");
...@@ -4882,6 +4875,9 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -4882,6 +4875,9 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
copyPairsKernel = cu.getKernel(module, "copyPairsToNeighborList"); copyPairsKernel = cu.getKernel(module, "copyPairsToNeighborList");
cuFuncSetCacheConfig(forceKernel, CU_FUNC_CACHE_PREFER_L1); cuFuncSetCacheConfig(forceKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(neighborsKernel, CU_FUNC_CACHE_PREFER_L1); cuFuncSetCacheConfig(neighborsKernel, CU_FUNC_CACHE_PREFER_L1);
size_t bytes;
CHECK_RESULT(cuModuleGetGlobal(&globalsPtr, &bytes, module, "globals"), "Error getting address for constant memory")
cuMemcpyHtoD(globalsPtr, &globalParamValues[0], globalParamValues.size()*sizeof(float));
} }
double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
...@@ -4908,8 +4904,6 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -4908,8 +4904,6 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs.push_back(&exclusions->getDevicePointer()); forceArgs.push_back(&exclusions->getDevicePointer());
forceArgs.push_back(&exclusionStartIndex->getDevicePointer()); forceArgs.push_back(&exclusionStartIndex->getDevicePointer());
} }
if (globals != NULL)
forceArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
forceArgs.push_back(&buffer.getMemory()); forceArgs.push_back(&buffer.getMemory());
...@@ -4958,7 +4952,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -4958,7 +4952,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
copyPairsArgs.push_back(&neighborStartIndex->getDevicePointer()); copyPairsArgs.push_back(&neighborStartIndex->getDevicePointer());
} }
} }
if (globals != NULL) { if (globalParamValues.size() > 0) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -4967,7 +4961,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -4967,7 +4961,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); cuMemcpyHtoD(globalsPtr, &globalParamValues[0], globalParamValues.size()*sizeof(float));
} }
while (true) { while (true) {
int* numPairs = (int*) cu.getPinnedBuffer(); int* numPairs = (int*) cu.getPinnedBuffer();
......
...@@ -74,6 +74,8 @@ inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restri ...@@ -74,6 +74,8 @@ inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restri
return false; return false;
} }
__constant__ float globals[NUM_GLOBALS];
/** /**
* Compute the interaction. * Compute the interaction.
*/ */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment