Unverified Commit fe21d5ee authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Use blocking sync when creating events (#3561)

parent c36c76ca
...@@ -541,6 +541,10 @@ public: ...@@ -541,6 +541,10 @@ public:
* expense of reduced simulation performance. * expense of reduced simulation performance.
*/ */
void flushQueue(); void flushQueue();
/**
* Get the flags that should be used when creating CUevent objects.
*/
unsigned int getEventFlags();
private: private:
/** /**
* Compute a sorted list of device indices in decreasing order of desirability * Compute a sorted list of device indices in decreasing order of desirability
......
...@@ -109,7 +109,8 @@ static int executeInWindows(const string &command) { ...@@ -109,7 +109,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, bool allowRuntimeCompiler, CudaPlatform::PlatformData& platformData, const string& tempDir, const std::string& hostCompiler, bool allowRuntimeCompiler, CudaPlatform::PlatformData& platformData,
CudaContext* originalContext) : ComputeContext(system), currentStream(0), platformData(platformData), contextIsValid(false), hasAssignedPosqCharges(false), CudaContext* originalContext) : ComputeContext(system), currentStream(0), platformData(platformData), contextIsValid(false), hasAssignedPosqCharges(false),
hasCompilerKernel(false), isNvccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL) { hasCompilerKernel(false), isNvccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL),
useBlockingSync(useBlockingSync) {
// Determine what compiler to use. // Determine what compiler to use.
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
...@@ -894,3 +895,10 @@ vector<int> CudaContext::getDevicePrecedence() { ...@@ -894,3 +895,10 @@ vector<int> CudaContext::getDevicePrecedence() {
return precedence; return precedence;
} }
unsigned int CudaContext::getEventFlags() {
unsigned int flags = CU_EVENT_DISABLE_TIMING;
if (useBlockingSync)
flags += CU_EVENT_BLOCKING_SYNC;
return flags;
}
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
using namespace OpenMM; using namespace OpenMM;
CudaEvent::CudaEvent(CudaContext& context) : context(context), eventCreated(false) { CudaEvent::CudaEvent(CudaContext& context) : context(context), eventCreated(false) {
CUresult result = cuEventCreate(&event, CU_EVENT_DISABLE_TIMING); CUresult result = cuEventCreate(&event, context.getEventFlags());
if (result != CUDA_SUCCESS) if (result != CUDA_SUCCESS)
throw OpenMMException("Error creating CUDA event:"+CudaContext::getErrorString(result)); throw OpenMMException("Error creating CUDA event:"+CudaContext::getErrorString(result));
eventCreated = true; eventCreated = true;
......
...@@ -41,7 +41,7 @@ using namespace std; ...@@ -41,7 +41,7 @@ using namespace std;
CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const System& system) : IntegrationUtilities(context, system), CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) { ccmaConvergedMemory(NULL) {
CHECK_RESULT2(cuEventCreate(&ccmaEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for CCMA"); CHECK_RESULT2(cuEventCreate(&ccmaEvent, context.getEventFlags()), "Error creating event for CCMA");
CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, sizeof(int), CU_MEMHOSTALLOC_DEVICEMAP), "Error allocating pinned memory"); CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, sizeof(int), CU_MEMHOSTALLOC_DEVICEMAP), "Error allocating pinned memory");
CHECK_RESULT2(cuMemHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory"); CHECK_RESULT2(cuMemHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
} }
......
...@@ -882,8 +882,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -882,8 +882,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cufftSetStream(dispersionFftBackward, pmeStream); cufftSetStream(dispersionFftBackward, pmeStream);
} }
} }
CHECK_RESULT(cuEventCreate(&pmeSyncEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for NonbondedForce"); CHECK_RESULT(cuEventCreate(&pmeSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
CHECK_RESULT(cuEventCreate(&paramsSyncEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for NonbondedForce"); CHECK_RESULT(cuEventCreate(&paramsSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup(); int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0) if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup(); recipForceGroup = force.getForceGroup();
......
...@@ -70,7 +70,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c ...@@ -70,7 +70,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
string errorMessage = "Error initializing nonbonded utilities"; string errorMessage = "Error initializing nonbonded utilities";
int multiprocessors; int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice())); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0)); CHECK_RESULT(cuEventCreate(&downloadCountEvent, context.getEventFlags()));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), CU_MEMHOSTALLOC_PORTABLE)); CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), CU_MEMHOSTALLOC_PORTABLE));
numForceThreadBlocks = 4*multiprocessors; numForceThreadBlocks = 4*multiprocessors;
forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256); forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
......
...@@ -184,18 +184,18 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -184,18 +184,18 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel(i).initialize(system); getKernel(i).initialize(system);
for (int i = 0; i < numContexts; i++) for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) numContexts; contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(cuEventCreate(&event, 0), "Error creating event"); CHECK_RESULT(cuEventCreate(&event, cu.getEventFlags()), "Error creating event");
peerCopyEvent.resize(numContexts); peerCopyEvent.resize(numContexts);
peerCopyEventLocal.resize(numContexts); peerCopyEventLocal.resize(numContexts);
peerCopyStream.resize(numContexts); peerCopyStream.resize(numContexts);
for (int i = 0; i < numContexts; i++) { for (int i = 0; i < numContexts; i++) {
CHECK_RESULT(cuEventCreate(&peerCopyEvent[i], 0), "Error creating event"); CHECK_RESULT(cuEventCreate(&peerCopyEvent[i], cu.getEventFlags()), "Error creating event");
CHECK_RESULT(cuStreamCreate(&peerCopyStream[i], CU_STREAM_NON_BLOCKING), "Error creating stream"); CHECK_RESULT(cuStreamCreate(&peerCopyStream[i], CU_STREAM_NON_BLOCKING), "Error creating stream");
} }
for (int i = 0; i < numContexts; i++) { for (int i = 0; i < numContexts; i++) {
CudaContext& cuLocal = *data.contexts[i]; CudaContext& cuLocal = *data.contexts[i];
ContextSelector selectorLocal(cuLocal); ContextSelector selectorLocal(cuLocal);
CHECK_RESULT(cuEventCreate(&peerCopyEventLocal[i], 0), "Error creating event"); CHECK_RESULT(cuEventCreate(&peerCopyEventLocal[i], cu.getEventFlags()), "Error creating event");
} }
CHECK_RESULT(cuMemHostAlloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer"); CHECK_RESULT(cuMemHostAlloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment