Unverified Commit fa893467 authored by Philip Turner's avatar Philip Turner Committed by GitHub
Browse files

[macOS GPU Support] Tune dispatching of persistent threads for Apple silicon GPUs (#3978)

* Use 768 instead of 384 threads in generic kernels

* Use 1536 instead of 1024 threads in force kernels.
parent aa363660
...@@ -218,6 +218,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -218,6 +218,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
if (vendor.size() >= 5 && vendor.substr(0, 5) == "Apple") { if (vendor.size() >= 5 && vendor.substr(0, 5) == "Apple") {
simdWidth = 32; simdWidth = 32;
// 768 threads per GPU core.
numThreadBlocksPerComputeUnit = 12;
} }
else if (vendor.size() >= 6 && vendor.substr(0, 6) == "NVIDIA") { else if (vendor.size() >= 6 && vendor.substr(0, 6) == "NVIDIA") {
compilationDefines["WARPS_ARE_ATOMIC"] = ""; compilationDefines["WARPS_ARE_ATOMIC"] = "";
......
...@@ -65,8 +65,14 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con ...@@ -65,8 +65,14 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
forceThreadBlockSize = 1; forceThreadBlockSize = 1;
} }
else if (context.getSIMDWidth() == 32) { else if (context.getSIMDWidth() == 32) {
numForceThreadBlocks = 4*context.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); int blocksPerComputeUnit = 4;
forceThreadBlockSize = 256; std::string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>();
if (vendor.size() >= 5 && vendor.substr(0, 5) == "Apple") {
// 1536 threads per GPU core.
blocksPerComputeUnit = 6;
}
numForceThreadBlocks = blocksPerComputeUnit*context.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
forceThreadBlockSize = 256;
} }
else { else {
numForceThreadBlocks = context.getNumThreadBlocks(); numForceThreadBlocks = context.getNumThreadBlocks();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment