Unverified Commit fe21d5ee authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Use blocking sync when creating events (#3561)

parent c36c76ca
......@@ -541,6 +541,10 @@ public:
* expense of reduced simulation performance.
*/
void flushQueue();
/**
* Get the flags that should be used when creating CUevent objects.
*/
unsigned int getEventFlags();
private:
/**
* Compute a sorted list of device indices in decreasing order of desirability
......
......@@ -109,7 +109,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, bool allowRuntimeCompiler, CudaPlatform::PlatformData& platformData,
CudaContext* originalContext) : ComputeContext(system), currentStream(0), platformData(platformData), contextIsValid(false), hasAssignedPosqCharges(false),
hasCompilerKernel(false), isNvccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL) {
hasCompilerKernel(false), isNvccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL),
useBlockingSync(useBlockingSync) {
// Determine what compiler to use.
this->compiler = "\""+compiler+"\"";
......@@ -894,3 +895,10 @@ vector<int> CudaContext::getDevicePrecedence() {
return precedence;
}
unsigned int CudaContext::getEventFlags() {
unsigned int flags = CU_EVENT_DISABLE_TIMING;
if (useBlockingSync)
flags += CU_EVENT_BLOCKING_SYNC;
return flags;
}
......@@ -30,7 +30,7 @@
using namespace OpenMM;
CudaEvent::CudaEvent(CudaContext& context) : context(context), eventCreated(false) {
CUresult result = cuEventCreate(&event, CU_EVENT_DISABLE_TIMING);
CUresult result = cuEventCreate(&event, context.getEventFlags());
if (result != CUDA_SUCCESS)
throw OpenMMException("Error creating CUDA event:"+CudaContext::getErrorString(result));
eventCreated = true;
......
......@@ -41,7 +41,7 @@ using namespace std;
CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) {
CHECK_RESULT2(cuEventCreate(&ccmaEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for CCMA");
CHECK_RESULT2(cuEventCreate(&ccmaEvent, context.getEventFlags()), "Error creating event for CCMA");
CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, sizeof(int), CU_MEMHOSTALLOC_DEVICEMAP), "Error allocating pinned memory");
CHECK_RESULT2(cuMemHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
}
......
......@@ -882,8 +882,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cufftSetStream(dispersionFftBackward, pmeStream);
}
}
CHECK_RESULT(cuEventCreate(&pmeSyncEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for NonbondedForce");
CHECK_RESULT(cuEventCreate(&paramsSyncEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for NonbondedForce");
CHECK_RESULT(cuEventCreate(&pmeSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
CHECK_RESULT(cuEventCreate(&paramsSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
......
......@@ -70,7 +70,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
string errorMessage = "Error initializing nonbonded utilities";
int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, context.getEventFlags()));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), CU_MEMHOSTALLOC_PORTABLE));
numForceThreadBlocks = 4*multiprocessors;
forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
......
......@@ -184,18 +184,18 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel(i).initialize(system);
for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(cuEventCreate(&event, 0), "Error creating event");
CHECK_RESULT(cuEventCreate(&event, cu.getEventFlags()), "Error creating event");
peerCopyEvent.resize(numContexts);
peerCopyEventLocal.resize(numContexts);
peerCopyStream.resize(numContexts);
for (int i = 0; i < numContexts; i++) {
CHECK_RESULT(cuEventCreate(&peerCopyEvent[i], 0), "Error creating event");
CHECK_RESULT(cuEventCreate(&peerCopyEvent[i], cu.getEventFlags()), "Error creating event");
CHECK_RESULT(cuStreamCreate(&peerCopyStream[i], CU_STREAM_NON_BLOCKING), "Error creating stream");
}
for (int i = 0; i < numContexts; i++) {
CudaContext& cuLocal = *data.contexts[i];
ContextSelector selectorLocal(cuLocal);
CHECK_RESULT(cuEventCreate(&peerCopyEventLocal[i], 0), "Error creating event");
CHECK_RESULT(cuEventCreate(&peerCopyEventLocal[i], cu.getEventFlags()), "Error creating event");
}
CHECK_RESULT(cuMemHostAlloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment