Commit 0b35240d authored by peastman's avatar peastman
Browse files

Merge pull request #255 from peastman/master

Parallelized summing the forces from different threads
parents f1258fc3 8bdf0829
...@@ -49,6 +49,7 @@ namespace OpenMM { ...@@ -49,6 +49,7 @@ namespace OpenMM {
*/ */
class CpuCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel { class CpuCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
public: public:
class SumForceTask;
CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context); CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context);
/** /**
* Initialize the kernel. * Initialize the kernel.
......
...@@ -106,6 +106,30 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& ...@@ -106,6 +106,30 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy; return 0.5*energy;
} }
class CpuCalcForcesAndEnergyKernel::SumForceTask : public ThreadPool::Task {
public:
SumForceTask(int numParticles, vector<RealVec>& forceData, CpuPlatform::PlatformData& data) : numParticles(numParticles), forceData(forceData), data(data) {
}
void execute(ThreadPool& threads, int threadIndex) {
// Sum the contributions to forces that have been calculated by different threads.
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
}
int numParticles;
vector<RealVec>& forceData;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) : CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) { CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel. // Create a Reference platform version of this kernel.
...@@ -153,17 +177,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -153,17 +177,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
// Sum the forces from all the threads. // Sum the forces from all the threads.
int numParticles = context.getSystem().getNumParticles(); SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data);
int numThreads = data.threads.getNumThreads(); data.threads.execute(task);
vector<RealVec>& forceData = extractForces(context); data.threads.waitForThreads();
for (int i = 0; i < numParticles; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups); return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment