Commit f48b8860 authored by Peter Eastman's avatar Peter Eastman
Browse files

Created a CudaPlatform property for specifying whether CUDA should sync or...

Created a CudaPlatform property for specifying whether CUDA should sync or spin loop while waiting for results
parent 09b70ba2
...@@ -64,6 +64,13 @@ public: ...@@ -64,6 +64,13 @@ public:
static const std::string key = "CudaDevice"; static const std::string key = "CudaDevice";
return key; return key;
} }
/**
* This is the name of the parameter for selecting whether CUDA should sync or spin loop while waiting for results.
*/
static const std::string& CudaUseBlockingSync() {
static const std::string key = "CudaUseBlockingSync";
return key;
}
private: private:
CudaStreamFactory defaultStreamFactory; CudaStreamFactory defaultStreamFactory;
}; };
......
...@@ -63,7 +63,9 @@ CudaPlatform::CudaPlatform() { ...@@ -63,7 +63,9 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(CalcKineticEnergyKernel::Name(), factory); registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory); registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
platformProperties.push_back(CudaDevice()); platformProperties.push_back(CudaDevice());
platformProperties.push_back(CudaUseBlockingSync());
setPropertyDefaultValue(CudaDevice(), "0"); setPropertyDefaultValue(CudaDevice(), "0");
setPropertyDefaultValue(CudaUseBlockingSync(), "false");
} }
bool CudaPlatform::supportsDoublePrecision() const { bool CudaPlatform::supportsDoublePrecision() const {
...@@ -92,7 +94,7 @@ void CudaPlatform::contextCreated(ContextImpl& context) const { ...@@ -92,7 +94,7 @@ void CudaPlatform::contextCreated(ContextImpl& context) const {
if (devicePropValue.length() > 0) if (devicePropValue.length() > 0)
stringstream(devicePropValue) >> device; stringstream(devicePropValue) >> device;
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
_gpuContext* gpu = (_gpuContext*) gpuInit(numParticles, device); _gpuContext* gpu = (_gpuContext*) gpuInit(numParticles, device, getPropertyDefaultValue(CudaUseBlockingSync()) == "true");
context.setPlatformData(new PlatformData(gpu)); context.setPlatformData(new PlatformData(gpu));
} }
...@@ -107,4 +109,5 @@ CudaPlatform::PlatformData::PlatformData(_gpuContext* gpu) : gpu(gpu), removeCM( ...@@ -107,4 +109,5 @@ CudaPlatform::PlatformData::PlatformData(_gpuContext* gpu) : gpu(gpu), removeCM(
stringstream device; stringstream device;
device << gpu->device; device << gpu->device;
propertyValues[CudaPlatform::CudaDevice()] = device.str(); propertyValues[CudaPlatform::CudaDevice()] = device.str();
propertyValues[CudaPlatform::CudaUseBlockingSync()] = (gpu->useBlockingSync ? "true" : "false");
} }
...@@ -1118,7 +1118,7 @@ bool gpuIsAvailable() ...@@ -1118,7 +1118,7 @@ bool gpuIsAvailable()
} }
extern "C" extern "C"
void* gpuInit(int numAtoms, unsigned int device) void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
{ {
gpuContext gpu = new _gpuContext; gpuContext gpu = new _gpuContext;
int LRFSize = 0; int LRFSize = 0;
...@@ -1134,6 +1134,9 @@ void* gpuInit(int numAtoms, unsigned int device) ...@@ -1134,6 +1134,9 @@ void* gpuInit(int numAtoms, unsigned int device)
cudaSetDevice(device); // Ignore errors cudaSetDevice(device); // Ignore errors
status = cudaGetDevice(&gpu->device); status = cudaGetDevice(&gpu->device);
RTERROR(status, "Error getting CUDA device") RTERROR(status, "Error getting CUDA device")
status = cudaSetDeviceFlags(useBlockingSync ? cudaDeviceBlockingSync : cudaDeviceScheduleAuto);
RTERROR(status, "Error setting device flags")
gpu->useBlockingSync = useBlockingSync;
// Determine kernel call configuration // Determine kernel call configuration
cudaDeviceProp deviceProp; cudaDeviceProp deviceProp;
......
...@@ -59,6 +59,7 @@ struct _gpuContext { ...@@ -59,6 +59,7 @@ struct _gpuContext {
//have to be repeatedly passed around //have to be repeatedly passed around
int natoms; int natoms;
int device; int device;
bool useBlockingSync;
gpuAtomType* gpAtomTable; gpuAtomType* gpAtomTable;
int gAtomTypes; int gAtomTypes;
cudaGmxSimulation sim; cudaGmxSimulation sim;
...@@ -206,7 +207,7 @@ extern "C" ...@@ -206,7 +207,7 @@ extern "C"
void gpuInitializeRandoms(gpuContext gpu); void gpuInitializeRandoms(gpuContext gpu);
extern "C" extern "C"
void* gpuInit(int numAtoms, unsigned int device); void* gpuInit(int numAtoms, unsigned int device = 0, bool useBlockingSync = false);
extern "C" extern "C"
void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature, float errorTol); void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature, float errorTol);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment