Created a CudaPlatform property for specifying whether CUDA should sync or...

Created a CudaPlatform property for specifying whether CUDA should sync or spin loop while waiting for results

Created a CudaPlatform property for specifying whether CUDA should sync or...
Created a CudaPlatform property for specifying whether CUDA should sync or spin loop while waiting for results
f48b8860 · Peter Eastman · 09b70ba2 · f48b8860 · f48b8860 · f48b8860
Commit f48b8860 authored Jul 28, 2009 by Peter Eastman
4 changed files
--- a/platforms/cuda/include/CudaPlatform.h
+++ b/platforms/cuda/include/CudaPlatform.h
@@ -64,6 +64,13 @@ public:
        static const std::string key = "CudaDevice";
        return key;
    }
+    /**
+     * This is the name of the parameter for selecting whether CUDA should sync or spin loop while waiting for results.
+     */
+    static const std::string& CudaUseBlockingSync() {
+        static const std::string key = "CudaUseBlockingSync";
+        return key;
+    }
 private:
    CudaStreamFactory defaultStreamFactory;
 };

--- a/platforms/cuda/src/CudaPlatform.cpp
+++ b/platforms/cuda/src/CudaPlatform.cpp
@@ -63,7 +63,9 @@ CudaPlatform::CudaPlatform() {
    registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
    registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
    platformProperties.push_back(CudaDevice());
+    platformProperties.push_back(CudaUseBlockingSync());
    setPropertyDefaultValue(CudaDevice(), "0");
+    setPropertyDefaultValue(CudaUseBlockingSync(), "false");
 }
 bool CudaPlatform::supportsDoublePrecision() const {
@@ -92,7 +94,7 @@ void CudaPlatform::contextCreated(ContextImpl& context) const {
    if (devicePropValue.length() > 0)
        stringstream(devicePropValue) >> device;
    int numParticles = context.getSystem().getNumParticles();
-    _gpuContext* gpu = (_gpuContext*) gpuInit(numParticles, device);
+    _gpuContext* gpu = (_gpuContext*) gpuInit(numParticles, device, getPropertyDefaultValue(CudaUseBlockingSync()) == "true");
    context.setPlatformData(new PlatformData(gpu));
 }
@@ -107,4 +109,5 @@ CudaPlatform::PlatformData::PlatformData(_gpuContext* gpu) : gpu(gpu), removeCM(
    stringstream device;
    device << gpu->device;
    propertyValues[CudaPlatform::CudaDevice()] = device.str();
+    propertyValues[CudaPlatform::CudaUseBlockingSync()] = (gpu->useBlockingSync ? "true" : "false");
 }
--- a/platforms/cuda/src/kernels/gpu.cpp
+++ b/platforms/cuda/src/kernels/gpu.cpp
@@ -1118,7 +1118,7 @@ bool gpuIsAvailable()
 }
 extern "C"
-void* gpuInit(int numAtoms, unsigned int device)
+void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
 {
    gpuContext gpu = new _gpuContext;
    int LRFSize = 0;
@@ -1134,6 +1134,9 @@ void* gpuInit(int numAtoms, unsigned int device)
        cudaSetDevice(device); // Ignore errors
    status = cudaGetDevice(&gpu->device);
    RTERROR(status, "Error getting CUDA device")
+    status = cudaSetDeviceFlags(useBlockingSync ? cudaDeviceBlockingSync : cudaDeviceScheduleAuto);
+    RTERROR(status, "Error setting device flags")
+    gpu->useBlockingSync = useBlockingSync;
    // Determine kernel call configuration
    cudaDeviceProp deviceProp;

--- a/platforms/cuda/src/kernels/gputypes.h
+++ b/platforms/cuda/src/kernels/gputypes.h
@@ -59,6 +59,7 @@ struct _gpuContext {
    //have to be repeatedly passed around
    int natoms;
    int device;
+    bool useBlockingSync;
    gpuAtomType* gpAtomTable;
    int gAtomTypes;
    cudaGmxSimulation sim;
@@ -206,7 +207,7 @@ extern "C"
 void gpuInitializeRandoms(gpuContext gpu);
 extern "C"
-void* gpuInit(int numAtoms, unsigned int device);
+void* gpuInit(int numAtoms, unsigned int device = 0, bool useBlockingSync = false);
 extern "C"
 void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature, float errorTol);