"devtools/packaging/scripts/vscode:/vscode.git/clone" did not exist on "d2fd6cee75cf18a121ccf90cf7fe223638010bca"
Commit b278bb6d authored by peastman's avatar peastman
Browse files

Direct space nonbonded is multithreaded

parent 3c435fb8
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "CpuPlatform.h" #include "CpuPlatform.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "CpuNonbondedForce.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
...@@ -87,6 +88,7 @@ private: ...@@ -87,6 +88,7 @@ private:
std::vector<float> forces; std::vector<float> forces;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNeighborList neighborList; CpuNeighborList neighborList;
CpuNonbondedForce nonbonded;
Kernel optimizedPme; Kernel optimizedPme;
}; };
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#define OPENMM_CPU_NONBONDED_FORCE_H__ #define OPENMM_CPU_NONBONDED_FORCE_H__
#include "ReferencePairIxn.h" #include "ReferencePairIxn.h"
#include <pthread.h>
#include <set> #include <set>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -33,25 +34,8 @@ ...@@ -33,25 +34,8 @@
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
class CpuNonbondedForce { class CpuNonbondedForce {
private:
bool cutoff;
bool useSwitch;
bool periodic;
bool ewald;
bool pme;
const std::vector<std::pair<int, int> >* neighborList;
float periodicBoxSize[3];
float cutoffDistance, switchingDistance;
float krf, crf;
float alphaEwald;
int numRx, numRy, numRz;
int meshDim[3];
__m128 boxSize, invBoxSize, half;
static float TWO_OVER_SQRT_PI;
public: public:
class ThreadData;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -59,7 +43,7 @@ class CpuNonbondedForce { ...@@ -59,7 +43,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce( ); CpuNonbondedForce();
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -67,7 +51,7 @@ class CpuNonbondedForce { ...@@ -67,7 +51,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
~CpuNonbondedForce( ); ~CpuNonbondedForce();
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -79,7 +63,7 @@ class CpuNonbondedForce { ...@@ -79,7 +63,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void setUseCutoff( float distance, const std::vector<std::pair<int, int> >& neighbors, float solventDielectric ); void setUseCutoff(float distance, const std::vector<std::pair<int, int> >& neighbors, float solventDielectric);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -89,7 +73,7 @@ class CpuNonbondedForce { ...@@ -89,7 +73,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void setUseSwitchingFunction( float distance ); void setUseSwitchingFunction(float distance);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -101,7 +85,7 @@ class CpuNonbondedForce { ...@@ -101,7 +85,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void setPeriodic( float* periodicBoxSize ); void setPeriodic(float* periodicBoxSize);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -165,9 +149,40 @@ class CpuNonbondedForce { ...@@ -165,9 +149,40 @@ class CpuNonbondedForce {
void calculateDirectIxn(int numberOfAtoms, float* posq, void calculateDirectIxn(int numberOfAtoms, float* posq,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions, const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
float* fixedParameters, float* forces, float* totalEnergy) const; float* fixedParameters, float* forces, float* totalEnergy);
/**
* This routine contains the code executed by each thread.
*/
void runThread(int index, std::vector<float>& threadForce, double& threadEnergy);
private: private:
bool cutoff;
bool useSwitch;
bool periodic;
bool ewald;
bool pme;
const std::vector<std::pair<int, int> >* neighborList;
float periodicBoxSize[3];
float cutoffDistance, switchingDistance;
float krf, crf;
float alphaEwald;
int numRx, numRy, numRz;
int meshDim[3];
__m128 boxSize, invBoxSize, half;
bool isDeleted;
int numThreads, waitCount;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads.
float* posq;
std::vector<std::pair<float, float> > atomParameters;
std::vector<std::set<int> > exclusions;
bool includeEnergy;
static float TWO_OVER_SQRT_PI;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -175,16 +190,12 @@ private: ...@@ -175,16 +190,12 @@ private:
@param atom1 the index of the first atom @param atom1 the index of the first atom
@param atom2 the index of the second atom @param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added) @param forces force array (forces added)
@param totalEnergy total energy @param totalEnergy total energy
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateOneIxn( int atom1, int atom2, float* posq, void calculateOneIxn(int atom1, int atom2, float* forces, double* totalEnergy);
const std::vector<std::pair<float, float> >& atomParameters, float* forces,
double* totalEnergy ) const;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -192,16 +203,12 @@ private: ...@@ -192,16 +203,12 @@ private:
@param atom1 the index of the first atom @param atom1 the index of the first atom
@param atom2 the index of the second atom @param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added) @param forces force array (forces added)
@param totalEnergy total energy @param totalEnergy total energy
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateOneEwaldIxn( int atom1, int atom2, float* posq, void calculateOneEwaldIxn(int atom1, int atom2, float* forces, double* totalEnergy);
const std::vector<std::pair<float, float> >& atomParameters, float* forces,
double* totalEnergy ) const;
void getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic) const; void getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic) const;
......
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "CpuKernels.h" #include "CpuKernels.h"
#include "CpuNonbondedForce.h"
#include "ReferenceBondForce.h" #include "ReferenceBondForce.h"
#include "ReferenceLJCoulomb14.h" #include "ReferenceLJCoulomb14.h"
#include "openmm/Context.h" #include "openmm/Context.h"
...@@ -197,7 +196,6 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -197,7 +196,6 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
RealVec boxSize = extractBoxSize(context); RealVec boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]}; float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
double energy = ewaldSelfEnergy; double energy = ewaldSelfEnergy;
CpuNonbondedForce clj;
bool periodic = (nonbondedMethod == CutoffPeriodic); bool periodic = (nonbondedMethod == CutoffPeriodic);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
...@@ -221,23 +219,23 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -221,23 +219,23 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
forces[i] = 0.0f; forces[i] = 0.0f;
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
neighborList.computeNeighborList(numParticles, posq, exclusions, floatBoxSize, periodic || ewald || pme, nonbondedCutoff); neighborList.computeNeighborList(numParticles, posq, exclusions, floatBoxSize, periodic || ewald || pme, nonbondedCutoff);
clj.setUseCutoff(nonbondedCutoff, neighborList.getNeighbors(), rfDielectric); nonbonded.setUseCutoff(nonbondedCutoff, neighborList.getNeighbors(), rfDielectric);
} }
if (periodic || ewald || pme) { if (periodic || ewald || pme) {
double minAllowedSize = 1.999999*nonbondedCutoff; double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxSize[0] < minAllowedSize || boxSize[1] < minAllowedSize || boxSize[2] < minAllowedSize) if (boxSize[0] < minAllowedSize || boxSize[1] < minAllowedSize || boxSize[2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
clj.setPeriodic(floatBoxSize); nonbonded.setPeriodic(floatBoxSize);
} }
if (ewald) if (ewald)
clj.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]); nonbonded.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
if (pme) if (pme)
clj.setUsePME(ewaldAlpha, gridSize); nonbonded.setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction) if (useSwitchingFunction)
clj.setUseSwitchingFunction(switchingDistance); nonbonded.setUseSwitchingFunction(switchingDistance);
float nonbondedEnergy = 0; float nonbondedEnergy = 0;
if (includeDirect) if (includeDirect)
clj.calculateDirectIxn(numParticles, &posq[0], particleParams, exclusions, 0, &forces[0], includeEnergy ? &nonbondedEnergy : NULL); nonbonded.calculateDirectIxn(numParticles, &posq[0], particleParams, exclusions, 0, &forces[0], includeEnergy ? &nonbondedEnergy : NULL);
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &forces[0], numParticles); PmeIO io(&posq[0], &forces[0], numParticles);
...@@ -246,7 +244,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -246,7 +244,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
} }
else else
clj.calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, 0, forceData, includeEnergy ? &nonbondedEnergy : NULL); nonbonded.calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, 0, forceData, includeEnergy ? &nonbondedEnergy : NULL);
} }
energy += nonbondedEnergy; energy += nonbondedEnergy;
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "CpuNonbondedForce.h" #include "CpuNonbondedForce.h"
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "ReferencePME.h" #include "ReferencePME.h"
#include "openmm/internal/hardware.h"
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -39,6 +40,24 @@ using namespace std; ...@@ -39,6 +40,24 @@ using namespace std;
float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M)); float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
class CpuNonbondedForce::ThreadData {
public:
ThreadData(int index, CpuNonbondedForce& owner) : index(index), owner(owner) {
}
int index;
CpuNonbondedForce& owner;
vector<float> threadForce;
double threadEnergy;
};
static void* threadBody(void* args) {
CpuNonbondedForce::ThreadData& data = *reinterpret_cast<CpuNonbondedForce::ThreadData*>(args);
data.owner.runThread(data.index, data.threadForce, data.threadEnergy);
delete &data;
return 0;
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor CpuNonbondedForce constructor
...@@ -46,13 +65,17 @@ float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M)); ...@@ -46,13 +65,17 @@ float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false) { CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false) {
isDeleted = false;
// --------------------------------------------------------------------------------------- numThreads = getNumProcessors();
pthread_cond_init(&startCondition, NULL);
// static const char* methodName = "\nCpuNonbondedForce::CpuNonbondedForce"; pthread_cond_init(&endCondition, NULL);
pthread_mutex_init(&lock, NULL);
// --------------------------------------------------------------------------------------- thread.resize(numThreads);
for (int i = 0; i < numThreads; i++) {
ThreadData* data = new ThreadData(i, *this);
threadData.push_back(data);
pthread_create(&thread[i], NULL, threadBody, data);
}
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -62,13 +85,15 @@ CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), period ...@@ -62,13 +85,15 @@ CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), period
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::~CpuNonbondedForce(){ CpuNonbondedForce::~CpuNonbondedForce(){
isDeleted = true;
// --------------------------------------------------------------------------------------- pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
// static const char* methodName = "\nCpuNonbondedForce::~CpuNonbondedForce"; pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++)
// --------------------------------------------------------------------------------------- pthread_join(thread[i], NULL);
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -280,18 +305,36 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v ...@@ -280,18 +305,36 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions, const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
float* fixedParameters, float* forces, float* totalEnergy) const { float* fixedParameters, float* forces, float* totalEnergy) {
// Record the parameters for the threads.
double directEnergy = 0; this->posq = posq;
double* energyPtr = (totalEnergy == NULL ? NULL : &directEnergy); this->atomParameters = atomParameters;
if (ewald || pme) { this->exclusions = exclusions;
// Compute the interactions from the neighbor list. includeEnergy = (totalEnergy != NULL);
for (int i = 0; i < (int) neighborList->size(); i++) { // Signal the threads to start running and wait for them to finish.
pair<int, int> pair = (*neighborList)[i];
calculateOneEwaldIxn(pair.first, pair.second, posq, atomParameters, forces, energyPtr); pthread_mutex_lock(&lock);
waitCount = 0;
pthread_cond_broadcast(&startCondition);
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
// Combine the results from all the threads.
double directEnergy = 0;
for (int i = 0; i < numThreads; i++)
directEnergy += threadData[i]->threadEnergy;
for (int i = 0; i < numberOfAtoms; i++) {
__m128 f = _mm_loadu_ps(forces+4*i);
for (int j = 0; j < numThreads; j++)
f = _mm_add_ps(f, _mm_loadu_ps(&threadData[j]->threadForce[4*i]));
_mm_storeu_ps(forces+4*i, f);
} }
if (ewald || pme) {
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum. // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
for (int i = 0; i < numberOfAtoms; i++) for (int i = 0; i < numberOfAtoms; i++)
...@@ -315,36 +358,66 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, ...@@ -315,36 +358,66 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq,
__m128 result = _mm_mul_ps(deltaR, _mm_set1_ps(dEdR)); __m128 result = _mm_mul_ps(deltaR, _mm_set1_ps(dEdR));
_mm_storeu_ps(forces+4*ii, _mm_sub_ps(_mm_loadu_ps(forces+4*ii), result)); _mm_storeu_ps(forces+4*ii, _mm_sub_ps(_mm_loadu_ps(forces+4*ii), result));
_mm_storeu_ps(forces+4*jj, _mm_add_ps(_mm_loadu_ps(forces+4*jj), result)); _mm_storeu_ps(forces+4*jj, _mm_add_ps(_mm_loadu_ps(forces+4*jj), result));
if (energyPtr != NULL) if (includeEnergy)
directEnergy -= chargeProd*inverseR*erfAlphaR; directEnergy -= chargeProd*inverseR*erfAlphaR;
} }
} }
} }
} }
if (totalEnergy != NULL)
*totalEnergy += (float) directEnergy;
}
void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& threadEnergy) {
while (true) {
// Wait for the signal to start running.
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
pthread_cond_wait(&startCondition, &lock);
pthread_mutex_unlock(&lock);
if (isDeleted)
break;
// Compute this thread's subset of interactions.
threadEnergy = 0;
double* energyPtr = (includeEnergy ? &threadEnergy : NULL);
int numberOfAtoms = atomParameters.size();
threadForce.resize(4*numberOfAtoms, 0.0f);
for (int i = 0; i < 4*numberOfAtoms; i++)
threadForce[i] = 0.0f;
if (ewald || pme) {
// Compute the interactions from the neighbor list.
for (int i = index; i < (int) neighborList->size(); i += numThreads) {
pair<int, int> pair = (*neighborList)[i];
calculateOneEwaldIxn(pair.first, pair.second, &threadForce[0], energyPtr);
}
}
else if (cutoff) { else if (cutoff) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
for (int i = 0; i < (int) neighborList->size(); i++) { for (int i = index; i < (int) neighborList->size(); i += numThreads) {
pair<int, int> pair = (*neighborList)[i]; pair<int, int> pair = (*neighborList)[i];
calculateOneIxn(pair.first, pair.second, posq, atomParameters, forces, energyPtr); calculateOneIxn(pair.first, pair.second, &threadForce[0], energyPtr);
} }
} }
else { else {
// Loop over all atom pairs // Loop over all atom pairs
for (int ii = 0; ii < numberOfAtoms; ii++){ for (int i = index; i < numberOfAtoms; i += numThreads){
for (int jj = ii+1; jj < numberOfAtoms; jj++) for (int j = i+1; j < numberOfAtoms; j++)
if (exclusions[jj].find(ii) == exclusions[jj].end()) if (exclusions[j].find(i) == exclusions[j].end())
calculateOneIxn(ii, jj, posq, atomParameters, forces, energyPtr); calculateOneIxn(i, j, &threadForce[0], energyPtr);
}
} }
} }
if (totalEnergy != NULL)
*totalEnergy += (float) directEnergy;
} }
void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq, void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* totalEnergy) {
const vector<pair<float, float> >& atomParameters, float* forces,
double* totalEnergy) const {
// get deltaR, R2, and R between 2 atoms // get deltaR, R2, and R between 2 atoms
__m128 deltaR; __m128 deltaR;
...@@ -396,9 +469,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq, ...@@ -396,9 +469,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq,
_mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result)); _mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result));
} }
void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* posq, void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* forces, double* totalEnergy) {
const vector<pair<float, float> >& atomParameters, float* forces,
double* totalEnergy) const {
__m128 deltaR; __m128 deltaR;
__m128 posI = _mm_loadu_ps(posq+4*ii); __m128 posI = _mm_loadu_ps(posq+4*ii);
__m128 posJ = _mm_loadu_ps(posq+4*jj); __m128 posJ = _mm_loadu_ps(posq+4*jj);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment