"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "46376ea3e4d0ff37cdaa5e55e034dbf5c3296b81"
Commit 138ae180 authored by peastman's avatar peastman
Browse files

Improved performance on AMD GPUs

parent 79ba3504
...@@ -94,7 +94,7 @@ private: ...@@ -94,7 +94,7 @@ private:
OpenCLArray buckets; OpenCLArray buckets;
cl::Kernel shortListKernel, shortList2Kernel, computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel; cl::Kernel shortListKernel, shortList2Kernel, computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel;
unsigned int dataLength, rangeKernelSize, positionsKernelSize, sortKernelSize; unsigned int dataLength, rangeKernelSize, positionsKernelSize, sortKernelSize;
bool isShortList; bool isShortList, useShortList2;
}; };
/** /**
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <string>
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -91,6 +92,11 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le ...@@ -91,6 +92,11 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
bucketOffset.initialize<cl_uint>(context, numBuckets, "bucketOffset"); bucketOffset.initialize<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement.initialize<cl_uint>(context, length, "bucketOfElement"); bucketOfElement.initialize<cl_uint>(context, length, "bucketOfElement");
offsetInBucket.initialize<cl_uint>(context, length, "offsetInBucket"); offsetInBucket.initialize<cl_uint>(context, length, "offsetInBucket");
string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>();
if (vendor.size() >= 6 && vendor.substr(0, 6) == "NVIDIA")
useShortList2 = (dataLength <= OpenCLContext::ThreadBlockSize*context.getNumThreadBlocks());
else
useShortList2 = false;
} }
buckets.initialize(context, length, trait->getDataSize(), "buckets"); buckets.initialize(context, length, trait->getDataSize(), "buckets");
} }
...@@ -107,7 +113,7 @@ void OpenCLSort::sort(OpenCLArray& data) { ...@@ -107,7 +113,7 @@ void OpenCLSort::sort(OpenCLArray& data) {
if (isShortList) { if (isShortList) {
// We can use a simpler sort kernel that does the entire operation in one kernel. // We can use a simpler sort kernel that does the entire operation in one kernel.
if (dataLength <= OpenCLContext::ThreadBlockSize*context.getNumThreadBlocks()) { if (useShortList2) {
shortList2Kernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); shortList2Kernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
shortList2Kernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer()); shortList2Kernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer());
shortList2Kernel.setArg<cl_int>(2, dataLength); shortList2Kernel.setArg<cl_int>(2, dataLength);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment