Commit 859bfd6c authored by peastman's avatar peastman
Browse files

Use lambdas for thread pool tasks

parent 2c559596
......@@ -137,35 +137,27 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy;
}
class CpuCalcForcesAndEnergyKernel::SumForceTask : public ThreadPool::Task {
public:
SumForceTask(int numParticles, vector<RealVec>& forceData, CpuPlatform::PlatformData& data) : numParticles(numParticles), forceData(forceData), data(data) {
}
void execute(ThreadPool& threads, int threadIndex) {
// Sum the contributions to forces that have been calculated by different threads.
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
}
int numParticles;
vector<RealVec>& forceData;
CpuPlatform::PlatformData& data;
};
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
class CpuCalcForcesAndEnergyKernel::InitForceTask : public ThreadPool::Task {
public:
InitForceTask(int numParticles, ContextImpl& context, CpuPlatform::PlatformData& data) : numParticles(numParticles), positionsValid(true), context(context), data(data) {
}
void execute(ThreadPool& threads, int threadIndex) {
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
bool positionsValid = true;
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Convert the positions to single precision and apply periodic boundary conditions
AlignedArray<float>& posq = data.posq;
......@@ -218,36 +210,9 @@ public:
fvec4 zero(0.0f);
for (int j = 0; j < numParticles; j++)
zero.store(&data.threadForce[threadIndex][j*4]);
}
int numParticles;
bool positionsValid;
ContextImpl& context;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
InitForceTask task(numParticles, context, data);
data.threads.execute(task);
});
data.threads.waitForThreads();
if (!task.positionsValid)
if (!positionsValid)
throw OpenMMException("Particle coordinate is nan");
// Determine whether we need to recompute the neighbor list.
......@@ -302,8 +267,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
// Sum the forces from all the threads.
SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data);
data.threads.execute(task);
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Sum the contributions to forces that have been calculated by different threads.
int numParticles = context.getSystem().getNumParticles();
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
vector<RealVec>& forceData = extractForces(context);
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
});
data.threads.waitForThreads();
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
}
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
......@@ -29,36 +29,6 @@
using namespace OpenMM;
using namespace std;
class CpuLangevinDynamics::Update1Task : public ThreadPool::Task {
public:
Update1Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate1(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update2Task : public ThreadPool::Task {
public:
Update2Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate2(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
public:
Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate3(threadIndex);
}
CpuLangevinDynamics& owner;
};
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM friction, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
}
......@@ -79,8 +49,7 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update1Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate1(threadIndex); });
threads.waitForThreads();
}
......@@ -97,8 +66,7 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update2Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate2(threadIndex); });
threads.waitForThreads();
}
......@@ -114,8 +82,7 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update3Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate3(threadIndex); });
threads.waitForThreads();
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -409,16 +409,6 @@ private:
vector<vector<vector<pair<float, int> > > > bins;
};
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
public:
ThreadTask(CpuNeighborList& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeNeighborList(threads, threadIndex);
}
CpuNeighborList& owner;
};
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
}
......@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Sort the atoms based on a Hilbert curve.
atomBins.resize(numAtoms);
ThreadTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
threads.waitForThreads();
sort(atomBins.begin(), atomBins.end());
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -41,16 +41,6 @@ using namespace OpenMM;
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
class CpuNonbondedForce::ComputeDirectTask : public ThreadPool::Task {
public:
ComputeDirectTask(CpuNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeDirect(threads, threadIndex);
}
CpuNonbondedForce& owner;
};
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
......@@ -318,8 +308,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
threads.waitForThreads();
// Signal the threads to subtract the exclusions.
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -35,52 +35,6 @@
using namespace OpenMM;
using namespace std;
class CpuSETTLE::ApplyToPositionsTask : public ThreadPool::Task {
public:
ApplyToPositionsTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), atomCoordinatesP(atomCoordinatesP),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& atomCoordinatesP;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
class CpuSETTLE::ApplyToVelocitiesTask : public ThreadPool::Task {
public:
ApplyToVelocitiesTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), velocities(velocities),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& velocities;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) {
int numBlocks = 10*threads.getNumThreads();
int numClusters = settle.getNumClusters();
......@@ -107,13 +61,29 @@ CpuSETTLE::~CpuSETTLE() {
}
void CpuSETTLE::apply(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) {
ApplyToPositionsTask task(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance, threadSettle);
threads.execute(task);
gmx_atomic_t atomicCounter;
gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
});
threads.waitForThreads();
}
void CpuSETTLE::applyToVelocities(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) {
ApplyToVelocitiesTask task(atomCoordinates, velocities, inverseMasses, tolerance, threadSettle);
threads.execute(task);
gmx_atomic_t atomicCounter;
gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
});
threads.waitForThreads();
}
......@@ -198,7 +198,6 @@ public:
*/
void loadCheckpoint(ContextImpl& context, std::istream& stream);
private:
class GetPositionsTask;
CudaContext& cu;
};
......
......@@ -147,11 +147,29 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts[i]->setTime(time);
}
class CudaUpdateStateDataKernel::GetPositionsTask : public ThreadPool::Task {
public:
GetPositionsTask(CudaContext& cu, vector<Vec3>& positions, vector<float4>& posCorrection) : cu(cu), positions(positions), posCorrection(posCorrection) {
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<float4> posCorrection;
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cu.getPosqCorrection().download(posCorrection);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
void execute(ThreadPool& threads, int threadIndex) {
// Filling in the output array is done in parallel for speed.
cu.getPlatformData().threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector<int>& order = cu.getAtomIndex();
......@@ -186,36 +204,7 @@ public:
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
}
CudaContext& cu;
vector<Vec3>& positions;
vector<float4>& posCorrection;
};
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<float4> posCorrection;
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cu.getPosqCorrection().download(posCorrection);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask task(cu, positions, posCorrection);
cu.getPlatformData().threads.execute(task);
});
cu.getPlatformData().threads.waitForThreads();
}
......
......@@ -176,7 +176,6 @@ public:
*/
void loadCheckpoint(ContextImpl& context, std::istream& stream);
private:
class GetPositionsTask;
OpenCLContext& cl;
};
......
......@@ -171,11 +171,28 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts[i]->setTime(time);
}
class OpenCLUpdateStateDataKernel::GetPositionsTask : public ThreadPool::Task {
public:
GetPositionsTask(OpenCLContext& cl, vector<Vec3>& positions, vector<mm_float4>& posCorrection) : cl(cl), positions(positions), posCorrection(posCorrection) {
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<mm_float4> posCorrection;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
void execute(ThreadPool& threads, int threadIndex) {
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cl.getPosqCorrection().download(posCorrection);
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
cl.getPlatformData().threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector<int>& order = cl.getAtomIndex();
......@@ -210,35 +227,7 @@ public:
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
}
OpenCLContext& cl;
vector<Vec3>& positions;
vector<mm_float4>& posCorrection;
};
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<mm_float4> posCorrection;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cl.getPosqCorrection().download(posCorrection);
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask task(cl, positions, posCorrection);
cl.getPlatformData().threads.execute(task);
});
cl.getPlatformData().threads.waitForThreads();
}
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Peter Eastman, Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -38,42 +38,6 @@
using namespace OpenMM;
using namespace std;
// This class extracts columns from the inverse matrix one at a time. It is done in parallel,
// since this can be very slow.
class ExtractMatrixTask : public ThreadPool::Task {
public:
ExtractMatrixTask(int numConstraints, vector<vector<pair<int, RealOpenMM> > >& transposedMatrix, const vector<RealOpenMM>& distance, RealOpenMM elementCutoff,
const int* qRowStart, const int* qColIndex, const int* rRowStart, const int* rColIndex, const double* qValue, const double* rValue) :
numConstraints(numConstraints), transposedMatrix(transposedMatrix), distance(distance), elementCutoff(elementCutoff), qRowStart(qRowStart), qColIndex(qColIndex),
rRowStart(rRowStart), rColIndex(rColIndex), qValue(qValue), rValue(rValue) {
}
void execute(ThreadPool& pool, int threadIndex) {
vector<double> rhs(numConstraints);
for (int i = threadIndex; i < numConstraints; i += pool.getNumThreads()) {
// Extract column i of the inverse matrix.
for (int j = 0; j < numConstraints; j++)
rhs[j] = (i == j ? 1.0 : 0.0);
QUERN_multiply_with_q_transpose(numConstraints, qRowStart, qColIndex, qValue, &rhs[0]);
QUERN_solve_with_r(numConstraints, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]);
for (int j = 0; j < numConstraints; j++) {
double value = rhs[j]*distance[i]/distance[j];
if (FABS((RealOpenMM) value) > elementCutoff)
transposedMatrix[i].push_back(pair<int, RealOpenMM>(j, (RealOpenMM) value));
}
}
}
private:
int numConstraints;
vector<vector<pair<int, RealOpenMM> > >& transposedMatrix;
const vector<RealOpenMM>& distance;
RealOpenMM elementCutoff;
const int *qRowStart, *qColIndex, *rRowStart, *rColIndex;
const double *qValue, *rValue;
};
ReferenceCCMAAlgorithm::ReferenceCCMAAlgorithm(int numberOfAtoms,
int numberOfConstraints,
const vector<pair<int, int> >& atomIndices,
......@@ -194,9 +158,27 @@ ReferenceCCMAAlgorithm::ReferenceCCMAAlgorithm(int numberOfAtoms,
&qRowStart, &qColIndex, &qValue, &rRowStart, &rColIndex, &rValue);
vector<vector<pair<int, RealOpenMM> > > transposedMatrix(numberOfConstraints);
_matrix.resize(numberOfConstraints);
// Extract columns from the inverse matrix one at a time. It is done in parallel,
// since this can be very slow.
ThreadPool threads;
ExtractMatrixTask task(numberOfConstraints, transposedMatrix, _distance, _elementCutoff, qRowStart, qColIndex, rRowStart, rColIndex, qValue, rValue);
threads.execute(task);
threads.execute([&] (ThreadPool& pool, int threadIndex) {
vector<double> rhs(numberOfConstraints);
for (int i = threadIndex; i < numberOfConstraints; i += pool.getNumThreads()) {
// Extract column i of the inverse matrix.
for (int j = 0; j < numberOfConstraints; j++)
rhs[j] = (i == j ? 1.0 : 0.0);
QUERN_multiply_with_q_transpose(numberOfConstraints, qRowStart, qColIndex, qValue, &rhs[0]);
QUERN_solve_with_r(numberOfConstraints, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]);
for (int j = 0; j < numberOfConstraints; j++) {
double value = rhs[j]*distance[i]/distance[j];
if (FABS((RealOpenMM) value) > elementCutoff)
transposedMatrix[i].push_back(pair<int, RealOpenMM>(j, (RealOpenMM) value));
}
}
});
threads.waitForThreads();
// For purposes of thread safety we extracted the matrix in transposed form, so we need to transpose it again.
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -334,16 +334,6 @@ static void interpolateForces(float* posq, float* force, float* grid, int gridx,
}
}
class CpuCalcPmeReciprocalForceKernel::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuCalcPmeReciprocalForceKernel& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.runWorkerThread(threads, threadIndex);
}
CpuCalcPmeReciprocalForceKernel& owner;
};
static void* threadBody(void* args) {
CpuCalcPmeReciprocalForceKernel& owner = *reinterpret_cast<CpuCalcPmeReciprocalForceKernel*>(args);
owner.runMainThread();
......@@ -483,9 +473,8 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
if (isDeleted)
break;
posq = io->getPosq();
ComputeTask task(*this);
gmx_atomic_set(&atomicCounter, 0);
threads.execute(task); // Signal threads to perform charge spreading.
threads.execute([&] (ThreadPool& threads, int threadIndex) { runWorkerThread(threads, threadIndex); }); // Signal threads to perform charge spreading.
threads.waitForThreads();
threads.resumeThreads(); // Signal threads to sum the charge grids.
threads.waitForThreads();
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -102,7 +102,6 @@ public:
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class ComputeTask;
/**
* Select a size for one grid dimension that FFTW can handle efficiently.
*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment