"plugins/vscode:/vscode.git/clone" did not exist on "a919ab447b201f0918d79c6665d7d3ed166830f3"
Commit 433ca1ea authored by peastman's avatar peastman
Browse files

Merge pull request #1205 from rmcgibbo/which-device

Ensure context can be created when selecting CUDA device
parents dddc9e45 eb5a680d
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <map> #include <map>
#include <queue> #include <queue>
#include <string> #include <string>
#include <utility>
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER #ifdef _MSC_VER
// Prevent Windows from defining macros that interfere with other code. // Prevent Windows from defining macros that interfere with other code.
...@@ -538,6 +539,11 @@ public: ...@@ -538,6 +539,11 @@ public:
*/ */
void invalidateMolecules(); void invalidateMolecules();
private: private:
/**
* Compute a sorted list of device indices in decreasing order of desirability
*/
std::vector<int> getDevicePrecedence();
struct Molecule; struct Molecule;
struct MoleculeGroup; struct MoleculeGroup;
class VirtualSiteInfo; class VirtualSiteInfo;
......
...@@ -122,49 +122,48 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -122,49 +122,48 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT(cuDeviceGetCount(&numDevices)); CHECK_RESULT(cuDeviceGetCount(&numDevices));
if (deviceIndex < -1 || deviceIndex >= numDevices) if (deviceIndex < -1 || deviceIndex >= numDevices)
throw OpenMMException("Illegal value for CudaDeviceIndex: "+intToString(deviceIndex)); throw OpenMMException("Illegal value for CudaDeviceIndex: "+intToString(deviceIndex));
if (deviceIndex == -1) {
// Try to figure out which device is the fastest.
int bestSpeed = -1; vector<int> devicePrecedence;
int bestCompute = -1; if (deviceIndex == -1) {
for (int i = 0; i < numDevices; i++) { devicePrecedence = getDevicePrecedence();
CHECK_RESULT(cuDeviceGet(&device, i)); } else {
int major, minor, clock, multiprocessors; devicePrecedence.push_back(deviceIndex);
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
if (major == 1 && minor < 2)
continue; // 1.0 and 1.1 are not supported
CHECK_RESULT(cuDeviceGetAttribute(&clock, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device));
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int speed = clock*multiprocessors;
if (major > bestCompute || (major == bestCompute && speed > bestSpeed)) {
deviceIndex = i;
bestSpeed = speed;
bestCompute = major;
} }
this->deviceIndex = -1;
for (int i = 0; i < static_cast<int>(devicePrecedence.size()); i++) {
int trialDeviceIndex = devicePrecedence[i];
CHECK_RESULT(cuDeviceGet(&device, trialDeviceIndex));
defaultOptimizationOptions = "--use_fast_math";
unsigned int flags = CU_CTX_MAP_HOST;
if (useBlockingSync)
flags += CU_CTX_SCHED_BLOCKING_SYNC;
else
flags += CU_CTX_SCHED_SPIN;
if (cuCtxCreate(&context, flags, device) == CUDA_SUCCESS) {
this->deviceIndex = trialDeviceIndex;
break;
} }
} }
if (deviceIndex == -1) if (this->deviceIndex == -1)
if (deviceIndex != -1)
throw OpenMMException("The requested CUDA device could not be loaded");
else
throw OpenMMException("No compatible CUDA device is available"); throw OpenMMException("No compatible CUDA device is available");
CHECK_RESULT(cuDeviceGet(&device, deviceIndex));
this->deviceIndex = deviceIndex;
int major, minor; int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device)); CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
// This is a workaround to support GTX 980 with CUDA 6.5. It reports its compute capability #if __CUDA_API_VERSION < 7000
// as 5.2, but the compiler doesn't support anything beyond 5.0. We can remove this once // This is a workaround to support GTX 980 with CUDA 6.5. It reports
// CUDA 7.0 is released. // its compute capability as 5.2, but the compiler doesn't support
// anything beyond 5.0.
if (major == 5) if (major == 5)
minor = 0; minor = 0;
#endif
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor; computeCapability = major+0.1*minor;
if ((useDoublePrecision || useMixedPrecision) && computeCapability < 1.3)
throw OpenMMException("This device does not support double precision");
defaultOptimizationOptions = "--use_fast_math";
unsigned int flags = CU_CTX_MAP_HOST;
if (useBlockingSync)
flags += CU_CTX_SCHED_BLOCKING_SYNC;
else
flags += CU_CTX_SCHED_SPIN;
CHECK_RESULT(cuCtxCreate(&context, flags, device));
contextIsValid = true; contextIsValid = true;
CHECK_RESULT(cuCtxSetCacheConfig(CU_FUNC_CACHE_PREFER_SHARED)); CHECK_RESULT(cuCtxSetCacheConfig(CU_FUNC_CACHE_PREFER_SHARED));
if (contextIndex > 0) { if (contextIndex > 0) {
...@@ -1393,3 +1392,41 @@ void CudaContext::WorkThread::flush() { ...@@ -1393,3 +1392,41 @@ void CudaContext::WorkThread::flush() {
pthread_cond_wait(&queueEmptyCondition, &queueLock); pthread_cond_wait(&queueEmptyCondition, &queueLock);
pthread_mutex_unlock(&queueLock); pthread_mutex_unlock(&queueLock);
} }
vector<int> CudaContext::getDevicePrecedence() {
int numDevices;
CUdevice thisDevice;
string errorMessage = "Error initializing Context";
vector<pair<pair<int, int>, int> > devices;
CHECK_RESULT(cuDeviceGetCount(&numDevices));
for (int i = 0; i < numDevices; i++) {
CHECK_RESULT(cuDeviceGet(&thisDevice, i));
int major, minor, clock, multiprocessors, speed;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, thisDevice));
if (major == 1 && minor < 2)
continue;
if ((useDoublePrecision || useMixedPrecision) && (major+0.1*minor < 1.3))
continue;
CHECK_RESULT(cuDeviceGetAttribute(&clock, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, thisDevice));
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, thisDevice));
speed = clock*multiprocessors;
pair<int, int> deviceProperties = std::make_pair(major, speed);
devices.push_back(std::make_pair(deviceProperties, -i));
}
// sort first by compute capability (higher is better), then speed
// (higher is better), and finally device index (lower is better)
std::sort(devices.begin(), devices.end());
std::reverse(devices.begin(), devices.end());
vector<int> precedence;
for (int i = 0; i < static_cast<int>(devices.size()); i++) {
precedence.push_back(-devices[i].second);
}
return precedence;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment