Commit f794f818 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed race condition in parallel execution where positions could get overwritten with forces

parent 6f24ee8a
...@@ -106,7 +106,7 @@ private: ...@@ -106,7 +106,7 @@ private:
OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) : OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextTiles(data.contexts.size()), contextForces(NULL), CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextTiles(data.contexts.size()), contextForces(NULL),
pinnedBuffer(NULL), pinnedMemory(NULL) { pinnedPositionBuffer(NULL), pinnedPositionMemory(NULL), pinnedForceBuffer(NULL), pinnedForceMemory(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++) for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcForcesAndEnergyKernel(name, platform, *data.contexts[i]))); kernels.push_back(Kernel(new OpenCLCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
} }
...@@ -114,8 +114,10 @@ OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel ...@@ -114,8 +114,10 @@ OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel
OpenCLParallelCalcForcesAndEnergyKernel::~OpenCLParallelCalcForcesAndEnergyKernel() { OpenCLParallelCalcForcesAndEnergyKernel::~OpenCLParallelCalcForcesAndEnergyKernel() {
if (contextForces != NULL) if (contextForces != NULL)
delete contextForces; delete contextForces;
if (pinnedBuffer != NULL) if (pinnedPositionBuffer != NULL)
delete pinnedBuffer; delete pinnedPositionBuffer;
if (pinnedForceBuffer != NULL)
delete pinnedForceBuffer;
} }
void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) { void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
...@@ -129,18 +131,20 @@ void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& cont ...@@ -129,18 +131,20 @@ void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& cont
contextForces = new OpenCLArray<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(), contextForces = new OpenCLArray<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces", true); data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces", true);
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*sizeof(mm_float4); int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*sizeof(mm_float4);
pinnedBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes); pinnedPositionMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedPositionBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
pinnedForceBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedForceMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedForceBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
} }
// Copy coordinates over to each device and execute the kernel. // Copy coordinates over to each device and execute the kernel.
cl0.getQueue().enqueueReadBuffer(cl0.getPosq().getDeviceBuffer(), CL_TRUE, 0, cl0.getPaddedNumAtoms()*sizeof(mm_float4), pinnedMemory); cl0.getQueue().enqueueReadBuffer(cl0.getPosq().getDeviceBuffer(), CL_TRUE, 0, cl0.getPaddedNumAtoms()*sizeof(mm_float4), pinnedPositionMemory);
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0; data.contextEnergy[i] = 0.0;
OpenCLContext& cl = *data.contexts[i]; OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread(); OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new BeginComputationTask(context, cl, getKernel(i), includeForce, includeEnergy, pinnedMemory)); thread.addTask(new BeginComputationTask(context, cl, getKernel(i), includeForce, includeEnergy, pinnedPositionMemory));
} }
} }
...@@ -148,7 +152,7 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c ...@@ -148,7 +152,7 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i]; OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread(); OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new FinishComputationTask(context, cl, getKernel(i), includeForce, includeEnergy, data.contextEnergy[i], completionTimes[i], pinnedMemory)); thread.addTask(new FinishComputationTask(context, cl, getKernel(i), includeForce, includeEnergy, data.contextEnergy[i], completionTimes[i], pinnedForceMemory));
} }
data.syncContexts(); data.syncContexts();
double energy = 0.0; double energy = 0.0;
...@@ -160,7 +164,7 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c ...@@ -160,7 +164,7 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext& cl = *data.contexts[0]; OpenCLContext& cl = *data.contexts[0];
int numAtoms = cl.getPaddedNumAtoms(); int numAtoms = cl.getPaddedNumAtoms();
cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*sizeof(mm_float4), cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*sizeof(mm_float4),
numAtoms*(data.contexts.size()-1)*sizeof(mm_float4), pinnedMemory); numAtoms*(data.contexts.size()-1)*sizeof(mm_float4), pinnedForceMemory);
cl.reduceBuffer(*contextForces, data.contexts.size()); cl.reduceBuffer(*contextForces, data.contexts.size());
// Balance work between the contexts by transferring a few nonbonded tiles from the context that // Balance work between the contexts by transferring a few nonbonded tiles from the context that
......
...@@ -80,8 +80,10 @@ private: ...@@ -80,8 +80,10 @@ private:
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<int> contextTiles; std::vector<int> contextTiles;
OpenCLArray<mm_float4>* contextForces; OpenCLArray<mm_float4>* contextForces;
cl::Buffer* pinnedBuffer; cl::Buffer* pinnedPositionBuffer;
mm_float4* pinnedMemory; cl::Buffer* pinnedForceBuffer;
mm_float4* pinnedPositionMemory;
mm_float4* pinnedForceMemory;
}; };
/** /**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment