"plugins/rpmd/platforms/hip/tests/TestHipRpmd.cpp" did not exist on "26a34a73b9eefc3f696750a07a3e19ead8bcc072"
Commit b278bb6d authored by peastman's avatar peastman
Browse files

Direct space nonbonded is multithreaded

parent 3c435fb8
......@@ -34,6 +34,7 @@
#include "CpuPlatform.h"
#include "CpuNeighborList.h"
#include "CpuNonbondedForce.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
......@@ -87,6 +88,7 @@ private:
std::vector<float> forces;
NonbondedMethod nonbondedMethod;
CpuNeighborList neighborList;
CpuNonbondedForce nonbonded;
Kernel optimizedPme;
};
......
......@@ -26,6 +26,7 @@
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "ReferencePairIxn.h"
#include <pthread.h>
#include <set>
#include <utility>
#include <vector>
......@@ -33,25 +34,8 @@
// ---------------------------------------------------------------------------------------
class CpuNonbondedForce {
private:
bool cutoff;
bool useSwitch;
bool periodic;
bool ewald;
bool pme;
const std::vector<std::pair<int, int> >* neighborList;
float periodicBoxSize[3];
float cutoffDistance, switchingDistance;
float krf, crf;
float alphaEwald;
int numRx, numRy, numRz;
int meshDim[3];
__m128 boxSize, invBoxSize, half;
static float TWO_OVER_SQRT_PI;
public:
class ThreadData;
/**---------------------------------------------------------------------------------------
......@@ -59,7 +43,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
CpuNonbondedForce( );
CpuNonbondedForce();
/**---------------------------------------------------------------------------------------
......@@ -67,7 +51,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
~CpuNonbondedForce( );
~CpuNonbondedForce();
/**---------------------------------------------------------------------------------------
......@@ -79,7 +63,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void setUseCutoff( float distance, const std::vector<std::pair<int, int> >& neighbors, float solventDielectric );
void setUseCutoff(float distance, const std::vector<std::pair<int, int> >& neighbors, float solventDielectric);
/**---------------------------------------------------------------------------------------
......@@ -89,7 +73,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void setUseSwitchingFunction( float distance );
void setUseSwitchingFunction(float distance);
/**---------------------------------------------------------------------------------------
......@@ -101,7 +85,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void setPeriodic( float* periodicBoxSize );
void setPeriodic(float* periodicBoxSize);
/**---------------------------------------------------------------------------------------
......@@ -165,9 +149,40 @@ class CpuNonbondedForce {
void calculateDirectIxn(int numberOfAtoms, float* posq,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
float* fixedParameters, float* forces, float* totalEnergy) const;
float* fixedParameters, float* forces, float* totalEnergy);
/**
* This routine contains the code executed by each thread.
*/
void runThread(int index, std::vector<float>& threadForce, double& threadEnergy);
private:
bool cutoff;
bool useSwitch;
bool periodic;
bool ewald;
bool pme;
const std::vector<std::pair<int, int> >* neighborList;
float periodicBoxSize[3];
float cutoffDistance, switchingDistance;
float krf, crf;
float alphaEwald;
int numRx, numRy, numRz;
int meshDim[3];
__m128 boxSize, invBoxSize, half;
bool isDeleted;
int numThreads, waitCount;
std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData;
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads.
float* posq;
std::vector<std::pair<float, float> > atomParameters;
std::vector<std::set<int> > exclusions;
bool includeEnergy;
static float TWO_OVER_SQRT_PI;
/**---------------------------------------------------------------------------------------
......@@ -175,16 +190,12 @@ private:
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateOneIxn( int atom1, int atom2, float* posq,
const std::vector<std::pair<float, float> >& atomParameters, float* forces,
double* totalEnergy ) const;
void calculateOneIxn(int atom1, int atom2, float* forces, double* totalEnergy);
/**---------------------------------------------------------------------------------------
......@@ -192,16 +203,12 @@ private:
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateOneEwaldIxn( int atom1, int atom2, float* posq,
const std::vector<std::pair<float, float> >& atomParameters, float* forces,
double* totalEnergy ) const;
void calculateOneEwaldIxn(int atom1, int atom2, float* forces, double* totalEnergy);
void getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic) const;
......
......@@ -30,7 +30,6 @@
* -------------------------------------------------------------------------- */
#include "CpuKernels.h"
#include "CpuNonbondedForce.h"
#include "ReferenceBondForce.h"
#include "ReferenceLJCoulomb14.h"
#include "openmm/Context.h"
......@@ -197,7 +196,6 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
RealVec boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
double energy = ewaldSelfEnergy;
CpuNonbondedForce clj;
bool periodic = (nonbondedMethod == CutoffPeriodic);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
......@@ -221,23 +219,23 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
forces[i] = 0.0f;
if (nonbondedMethod != NoCutoff) {
neighborList.computeNeighborList(numParticles, posq, exclusions, floatBoxSize, periodic || ewald || pme, nonbondedCutoff);
clj.setUseCutoff(nonbondedCutoff, neighborList.getNeighbors(), rfDielectric);
nonbonded.setUseCutoff(nonbondedCutoff, neighborList.getNeighbors(), rfDielectric);
}
if (periodic || ewald || pme) {
double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxSize[0] < minAllowedSize || boxSize[1] < minAllowedSize || boxSize[2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
clj.setPeriodic(floatBoxSize);
nonbonded.setPeriodic(floatBoxSize);
}
if (ewald)
clj.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
nonbonded.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
if (pme)
clj.setUsePME(ewaldAlpha, gridSize);
nonbonded.setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction)
clj.setUseSwitchingFunction(switchingDistance);
nonbonded.setUseSwitchingFunction(switchingDistance);
float nonbondedEnergy = 0;
if (includeDirect)
clj.calculateDirectIxn(numParticles, &posq[0], particleParams, exclusions, 0, &forces[0], includeEnergy ? &nonbondedEnergy : NULL);
nonbonded.calculateDirectIxn(numParticles, &posq[0], particleParams, exclusions, 0, &forces[0], includeEnergy ? &nonbondedEnergy : NULL);
if (includeReciprocal) {
if (useOptimizedPme) {
PmeIO io(&posq[0], &forces[0], numParticles);
......@@ -246,7 +244,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
}
else
clj.calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, 0, forceData, includeEnergy ? &nonbondedEnergy : NULL);
nonbonded.calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, 0, forceData, includeEnergy ? &nonbondedEnergy : NULL);
}
energy += nonbondedEnergy;
for (int i = 0; i < numParticles; i++) {
......
......@@ -30,6 +30,7 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/hardware.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
......@@ -39,6 +40,24 @@ using namespace std;
float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
class CpuNonbondedForce::ThreadData {
public:
ThreadData(int index, CpuNonbondedForce& owner) : index(index), owner(owner) {
}
int index;
CpuNonbondedForce& owner;
vector<float> threadForce;
double threadEnergy;
};
static void* threadBody(void* args) {
CpuNonbondedForce::ThreadData& data = *reinterpret_cast<CpuNonbondedForce::ThreadData*>(args);
data.owner.runThread(data.index, data.threadForce, data.threadEnergy);
delete &data;
return 0;
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
......@@ -46,13 +65,17 @@ float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false) {
// ---------------------------------------------------------------------------------------
// static const char* methodName = "\nCpuNonbondedForce::CpuNonbondedForce";
// ---------------------------------------------------------------------------------------
isDeleted = false;
numThreads = getNumProcessors();
pthread_cond_init(&startCondition, NULL);
pthread_cond_init(&endCondition, NULL);
pthread_mutex_init(&lock, NULL);
thread.resize(numThreads);
for (int i = 0; i < numThreads; i++) {
ThreadData* data = new ThreadData(i, *this);
threadData.push_back(data);
pthread_create(&thread[i], NULL, threadBody, data);
}
}
/**---------------------------------------------------------------------------------------
......@@ -62,13 +85,15 @@ CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), period
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::~CpuNonbondedForce(){
// ---------------------------------------------------------------------------------------
// static const char* methodName = "\nCpuNonbondedForce::~CpuNonbondedForce";
// ---------------------------------------------------------------------------------------
isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++)
pthread_join(thread[i], NULL);
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition);
}
/**---------------------------------------------------------------------------------------
......@@ -280,18 +305,36 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
float* fixedParameters, float* forces, float* totalEnergy) const {
float* fixedParameters, float* forces, float* totalEnergy) {
// Record the parameters for the threads.
double directEnergy = 0;
double* energyPtr = (totalEnergy == NULL ? NULL : &directEnergy);
if (ewald || pme) {
// Compute the interactions from the neighbor list.
this->posq = posq;
this->atomParameters = atomParameters;
this->exclusions = exclusions;
includeEnergy = (totalEnergy != NULL);
for (int i = 0; i < (int) neighborList->size(); i++) {
pair<int, int> pair = (*neighborList)[i];
calculateOneEwaldIxn(pair.first, pair.second, posq, atomParameters, forces, energyPtr);
// Signal the threads to start running and wait for them to finish.
pthread_mutex_lock(&lock);
waitCount = 0;
pthread_cond_broadcast(&startCondition);
while (waitCount < numThreads)
pthread_cond_wait(&endCondition, &lock);
pthread_mutex_unlock(&lock);
// Combine the results from all the threads.
double directEnergy = 0;
for (int i = 0; i < numThreads; i++)
directEnergy += threadData[i]->threadEnergy;
for (int i = 0; i < numberOfAtoms; i++) {
__m128 f = _mm_loadu_ps(forces+4*i);
for (int j = 0; j < numThreads; j++)
f = _mm_add_ps(f, _mm_loadu_ps(&threadData[j]->threadForce[4*i]));
_mm_storeu_ps(forces+4*i, f);
}
if (ewald || pme) {
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
for (int i = 0; i < numberOfAtoms; i++)
......@@ -315,36 +358,66 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq,
__m128 result = _mm_mul_ps(deltaR, _mm_set1_ps(dEdR));
_mm_storeu_ps(forces+4*ii, _mm_sub_ps(_mm_loadu_ps(forces+4*ii), result));
_mm_storeu_ps(forces+4*jj, _mm_add_ps(_mm_loadu_ps(forces+4*jj), result));
if (energyPtr != NULL)
if (includeEnergy)
directEnergy -= chargeProd*inverseR*erfAlphaR;
}
}
}
}
if (totalEnergy != NULL)
*totalEnergy += (float) directEnergy;
}
void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& threadEnergy) {
while (true) {
// Wait for the signal to start running.
pthread_mutex_lock(&lock);
waitCount++;
pthread_cond_signal(&endCondition);
pthread_cond_wait(&startCondition, &lock);
pthread_mutex_unlock(&lock);
if (isDeleted)
break;
// Compute this thread's subset of interactions.
threadEnergy = 0;
double* energyPtr = (includeEnergy ? &threadEnergy : NULL);
int numberOfAtoms = atomParameters.size();
threadForce.resize(4*numberOfAtoms, 0.0f);
for (int i = 0; i < 4*numberOfAtoms; i++)
threadForce[i] = 0.0f;
if (ewald || pme) {
// Compute the interactions from the neighbor list.
for (int i = index; i < (int) neighborList->size(); i += numThreads) {
pair<int, int> pair = (*neighborList)[i];
calculateOneEwaldIxn(pair.first, pair.second, &threadForce[0], energyPtr);
}
}
else if (cutoff) {
// Compute the interactions from the neighbor list.
for (int i = 0; i < (int) neighborList->size(); i++) {
for (int i = index; i < (int) neighborList->size(); i += numThreads) {
pair<int, int> pair = (*neighborList)[i];
calculateOneIxn(pair.first, pair.second, posq, atomParameters, forces, energyPtr);
calculateOneIxn(pair.first, pair.second, &threadForce[0], energyPtr);
}
}
else {
// Loop over all atom pairs
for (int ii = 0; ii < numberOfAtoms; ii++){
for (int jj = ii+1; jj < numberOfAtoms; jj++)
if (exclusions[jj].find(ii) == exclusions[jj].end())
calculateOneIxn(ii, jj, posq, atomParameters, forces, energyPtr);
for (int i = index; i < numberOfAtoms; i += numThreads){
for (int j = i+1; j < numberOfAtoms; j++)
if (exclusions[j].find(i) == exclusions[j].end())
calculateOneIxn(i, j, &threadForce[0], energyPtr);
}
}
}
if (totalEnergy != NULL)
*totalEnergy += (float) directEnergy;
}
void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq,
const vector<pair<float, float> >& atomParameters, float* forces,
double* totalEnergy) const {
void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* totalEnergy) {
// get deltaR, R2, and R between 2 atoms
__m128 deltaR;
......@@ -396,9 +469,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq,
_mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result));
}
void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* posq,
const vector<pair<float, float> >& atomParameters, float* forces,
double* totalEnergy) const {
void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* forces, double* totalEnergy) {
__m128 deltaR;
__m128 posI = _mm_loadu_ps(posq+4*ii);
__m128 posJ = _mm_loadu_ps(posq+4*jj);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment