Commit 51fc7a1e authored by peastman's avatar peastman
Browse files

Merge pull request #705 from peastman/multigpu

Further improvements to multi-GPU scaling
parents db72d0cb 8f12d8fc
...@@ -98,6 +98,12 @@ public: ...@@ -98,6 +98,12 @@ public:
// Execute the kernel, then download forces. // Execute the kernel, then download forces.
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups); energy += kernel.finishComputation(context, includeForce, includeEnergy, groups);
if (cu.getComputeForceCount() < 200) {
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
CHECK_RESULT(cuCtxSynchronize(), "Error synchronizing CUDA context");
completionTime = getTime();
}
if (includeForce) { if (includeForce) {
if (cu.getContextIndex() > 0) { if (cu.getContextIndex() > 0) {
int numAtoms = cu.getPaddedNumAtoms(); int numAtoms = cu.getPaddedNumAtoms();
...@@ -110,12 +116,7 @@ public: ...@@ -110,12 +116,7 @@ public:
else else
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]); cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
} }
else {
// In principle this should make the load balancing more accurate, but in practice it just seems to make things slower.
//CHECK_RESULT(cuCtxSynchronize(), "Error synchronizing CUDA context");
}
} }
completionTime = getTime();
} }
private: private:
ContextImpl& context; ContextImpl& context;
...@@ -192,6 +193,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex ...@@ -192,6 +193,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
} }
} }
#include <cstdio>
double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i]; CudaContext& cu = *data.contexts[i];
...@@ -216,24 +218,26 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con ...@@ -216,24 +218,26 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
// Balance work between the contexts by transferring a little nonbonded work from the context that // Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first. // finished last to the one that finished first.
int firstIndex = 0, lastIndex = 0; if (cu.getComputeForceCount() < 200) {
for (int i = 0; i < (int) completionTimes.size(); i++) { int firstIndex = 0, lastIndex = 0;
if (completionTimes[i] < completionTimes[firstIndex]) for (int i = 0; i < (int) completionTimes.size(); i++) {
firstIndex = i; if (completionTimes[i] < completionTimes[firstIndex])
if (completionTimes[i] > completionTimes[lastIndex]) firstIndex = i;
lastIndex = i; if (completionTimes[i] > completionTimes[lastIndex])
} lastIndex = i;
double fractionToTransfer = min(0.001, contextNonbondedFractions[lastIndex]); }
contextNonbondedFractions[firstIndex] += fractionToTransfer; double fractionToTransfer = min(0.01, contextNonbondedFractions[lastIndex]);
contextNonbondedFractions[lastIndex] -= fractionToTransfer; contextNonbondedFractions[firstIndex] += fractionToTransfer;
double startFraction = 0.0; contextNonbondedFractions[lastIndex] -= fractionToTransfer;
for (int i = 0; i < (int) contextNonbondedFractions.size(); i++) { double startFraction = 0.0;
double endFraction = startFraction+contextNonbondedFractions[i]; for (int i = 0; i < (int) contextNonbondedFractions.size(); i++) {
if (i == contextNonbondedFractions.size()-1) double endFraction = startFraction+contextNonbondedFractions[i];
endFraction = 1.0; // Avoid roundoff error if (i == contextNonbondedFractions.size()-1)
data.contexts[i]->getNonbondedUtilities().setAtomBlockRange(startFraction, endFraction); endFraction = 1.0; // Avoid roundoff error
startFraction = endFraction; data.contexts[i]->getNonbondedUtilities().setAtomBlockRange(startFraction, endFraction);
} startFraction = endFraction;
}
}
} }
return energy; return energy;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment