Unverified Commit d5a4ce06 authored by Philip Turner's avatar Philip Turner Committed by GitHub
Browse files

[macOS GPU Support] Fix `MTLCommandBuffer` bottlenecks for Apple silicon GPUs (#3960)

* Flushing optimization

* Remove unnecessary checks
parent 8528d8eb
...@@ -355,6 +355,12 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) { ...@@ -355,6 +355,12 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
forceRebuildNeighborList = false; forceRebuildNeighborList = false;
lastCutoff = kernels.cutoffDistance; lastCutoff = kernels.cutoffDistance;
context.getQueue().enqueueReadBuffer(interactionCount.getDeviceBuffer(), CL_FALSE, 0, sizeof(int), pinnedCountMemory, NULL, &downloadCountEvent); context.getQueue().enqueueReadBuffer(interactionCount.getDeviceBuffer(), CL_FALSE, 0, sizeof(int), pinnedCountMemory, NULL, &downloadCountEvent);
#if __APPLE__ && defined(__aarch64__)
// Segment the command stream to avoid stalls later.
if (groupKernels[forceGroups].hasForces)
context.getQueue().flush();
#endif
} }
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) { void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
...@@ -370,6 +376,11 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include ...@@ -370,6 +376,11 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
} }
if (useCutoff && numTiles > 0) { if (useCutoff && numTiles > 0) {
#if __APPLE__ && defined(__aarch64__)
// Ensure cached up work executes while you're waiting.
if (kernels.hasForces)
context.getQueue().flush();
#endif
downloadCountEvent.wait(); downloadCountEvent.wait();
updateNeighborListSize(); updateNeighborListSize();
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment