Commit 4d20b76e authored by one's avatar one
Browse files

Tune HIP neighbor-list launch heuristics

Apply heuristics for HIP neighbor-list construction:
use fewer nonbonded force blocks for small neighbor-list systems, use two
tiles per batch for larger atom-block counts, and increase the
findBlocksWithInteractions thread block size for small atom-block counts.

Standard concurrent validation shows no clear per-case regression and a
small geomean throughput improvement over the current blocksPerCU baseline.
parent 4e7070c2
...@@ -65,7 +65,7 @@ HipNonbondedUtilities::HipNonbondedUtilities(HipContext& context) : context(cont ...@@ -65,7 +65,7 @@ HipNonbondedUtilities::HipNonbondedUtilities(HipContext& context) : context(cont
string errorMessage = "Error initializing nonbonded utilities"; string errorMessage = "Error initializing nonbonded utilities";
CHECK_RESULT(hipEventCreateWithFlags(&downloadCountEvent, context.getEventFlags())); CHECK_RESULT(hipEventCreateWithFlags(&downloadCountEvent, context.getEventFlags()));
CHECK_RESULT(hipHostMalloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), context.getHostMallocFlags())); CHECK_RESULT(hipHostMalloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), context.getHostMallocFlags()));
numForceThreadBlocks = 16*4*context.getMultiprocessors(); numForceThreadBlocks = 48*context.getMultiprocessors();
forceThreadBlockSize = 256; forceThreadBlockSize = 256;
findInteractingBlocksThreadBlockSize = 128; findInteractingBlocksThreadBlockSize = 128;
...@@ -185,6 +185,9 @@ void HipNonbondedUtilities::initialize(const System& system) { ...@@ -185,6 +185,9 @@ void HipNonbondedUtilities::initialize(const System& system) {
numAtoms = context.getNumAtoms(); numAtoms = context.getNumAtoms();
int numAtomBlocks = context.getNumAtomBlocks(); int numAtomBlocks = context.getNumAtomBlocks();
const int blocksPerCU = (useNeighborList && numAtomBlocks < 2000 ? 24 : 48);
numForceThreadBlocks = blocksPerCU*context.getMultiprocessors();
findInteractingBlocksThreadBlockSize = (useNeighborList && numAtomBlocks < 2000 ? 256 : 128);
int numContexts = context.getPlatformData().contexts.size(); int numContexts = context.getPlatformData().contexts.size();
setAtomBlockRange(context.getContextIndex()/(double) numContexts, (context.getContextIndex()+1)/(double) numContexts); setAtomBlockRange(context.getContextIndex()/(double) numContexts, (context.getContextIndex()+1)/(double) numContexts);
...@@ -269,7 +272,7 @@ void HipNonbondedUtilities::initialize(const System& system) { ...@@ -269,7 +272,7 @@ void HipNonbondedUtilities::initialize(const System& system) {
maxTiles = 1; maxTiles = 1;
maxSinglePairs = 5*numAtoms; maxSinglePairs = 5*numAtoms;
// HIP-TODO: This may require tuning // HIP-TODO: This may require tuning
numTilesInBatch = numAtomBlocks < 2000 ? 4 : 1; numTilesInBatch = numAtomBlocks < 2000 ? 4 : 2;
interactingTiles.initialize<int>(context, maxTiles, "interactingTiles"); interactingTiles.initialize<int>(context, maxTiles, "interactingTiles");
interactingAtoms.initialize<int>(context, HipContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms.initialize<int>(context, HipContext::TileSize*maxTiles, "interactingAtoms");
interactionCount.initialize<unsigned int>(context, 2, "interactionCount"); interactionCount.initialize<unsigned int>(context, 2, "interactionCount");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment