Commit 5a9897bd authored by Peter Eastman's avatar Peter Eastman
Browse files

Minor optimizations to data transfer between GPUs

parent f631ecaf
...@@ -126,8 +126,8 @@ public: ...@@ -126,8 +126,8 @@ public:
/** /**
* Copy the values in a vector to the Buffer. * Copy the values in a vector to the Buffer.
*/ */
void upload(std::vector<T>& data) { void upload(std::vector<T>& data, bool blocking = true) {
upload(&data[0]); upload(&data[0], blocking);
} }
/** /**
* Copy the values in the Buffer to a vector. * Copy the values in the Buffer to a vector.
...@@ -140,9 +140,9 @@ public: ...@@ -140,9 +140,9 @@ public:
/** /**
* Copy the values in an array to the Buffer. * Copy the values in an array to the Buffer.
*/ */
void upload(T* data) { void upload(T* data, bool blocking = true) {
try { try {
context.getQueue().enqueueWriteBuffer(*buffer, CL_TRUE, 0, size*sizeof(T), data); context.getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*sizeof(T), data);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
...@@ -166,10 +166,10 @@ public: ...@@ -166,10 +166,10 @@ public:
/** /**
* Copy the values in the host buffer to the OpenCL Buffer. * Copy the values in the host buffer to the OpenCL Buffer.
*/ */
void upload() { void upload(bool blocking = true) {
if (local.size() == 0) if (local.size() == 0)
throw OpenMMException(name+": Called upload() on an OpenCLArray with no host buffer"); throw OpenMMException(name+": Called upload() on an OpenCLArray with no host buffer");
upload(local); upload(local, blocking);
} }
/** /**
* Copy the values in the Buffer to the host buffer. * Copy the values in the Buffer to the host buffer.
......
...@@ -199,7 +199,7 @@ void OpenCLContext::initialize(const System& system) { ...@@ -199,7 +199,7 @@ void OpenCLContext::initialize(const System& system) {
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
(*velm)[i].w = (float) (1.0/system.getParticleMass(i)); (*velm)[i].w = (float) (1.0/system.getParticleMass(i));
velm->upload(); velm->upload();
numForceBuffers = 1; numForceBuffers = platformData.contexts.size();
for (int i = 0; i < (int) forces.size(); i++) for (int i = 0; i < (int) forces.size(); i++)
numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers()); numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers());
forceBuffers = new OpenCLArray<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false); forceBuffers = new OpenCLArray<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false);
......
...@@ -61,7 +61,7 @@ public: ...@@ -61,7 +61,7 @@ public:
// Copy coordinates over to this device and execute the kernel. // Copy coordinates over to this device and execute the kernel.
if (cl.getContextIndex() > 0) if (cl.getContextIndex() > 0)
cl.getPosq().upload(cl.getPlatformData().contexts[0]->getPosq().getHostBuffer()); cl.getPosq().upload(cl.getPlatformData().contexts[0]->getPosq().getHostBuffer(), false);
kernel.beginComputation(context, includeForce, includeEnergy); kernel.beginComputation(context, includeForce, includeEnergy);
} }
private: private:
...@@ -82,7 +82,7 @@ public: ...@@ -82,7 +82,7 @@ public:
// Execute the kernel, then download forces. // Execute the kernel, then download forces.
energy += kernel.finishComputation(context, includeForce, includeEnergy); energy += kernel.finishComputation(context, includeForce, includeEnergy);
if (includeForce) if (includeForce && cl.getContextIndex() > 0)
cl.getForce().download(&contextForces[cl.getContextIndex()*cl.getPaddedNumAtoms()]); cl.getForce().download(&contextForces[cl.getContextIndex()*cl.getPaddedNumAtoms()]);
completionTime = getTime(); completionTime = getTime();
} }
...@@ -142,8 +142,11 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c ...@@ -142,8 +142,11 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
if (includeForce) { if (includeForce) {
// Sum the forces from all devices. // Sum the forces from all devices.
contextForces->upload(); OpenCLContext& cl = *data.contexts[0];
data.contexts[0]->reduceBuffer(*contextForces, data.contexts.size()); int numAtoms = cl.getPaddedNumAtoms();
cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*sizeof(mm_float4),
numAtoms*(data.contexts.size()-1)*sizeof(mm_float4), &(*contextForces)[numAtoms]);
cl.reduceBuffer(*contextForces, data.contexts.size());
// Balance work between the contexts by transferring a few nonbonded tiles from the context that // Balance work between the contexts by transferring a few nonbonded tiles from the context that
// finished last to the one that finished first. // finished last to the one that finished first.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment