Commit 79011828 authored by peastman's avatar peastman
Browse files

Assorted optimizations

parent f1533707
......@@ -176,9 +176,10 @@ private:
pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads.
int numberOfAtoms;
float* posq;
std::vector<std::pair<float, float> > atomParameters;
std::vector<std::set<int> > exclusions;
std::pair<float, float> const* atomParameters;
std::set<int> const* exclusions;
bool includeEnergy;
static const float TWO_OVER_SQRT_PI;
......
......@@ -117,13 +117,9 @@ public:
for (Voxel::const_iterator itemIter = voxel.begin(); itemIter != voxel.end(); ++itemIter) {
const int atomJ = itemIter->second;
// Ignore self hits
if (atomI >= atomJ)
continue;
// Ignore exclusions.
if (exclusions[atomI].find(atomJ) != exclusions[atomI].end())
continue;
// Avoid duplicate entries.
if (atomJ >= atomI)
break;
__m128 posJ = _mm_loadu_ps(itemIter->first);
__m128 delta = _mm_sub_ps(posJ, posI);
......@@ -134,6 +130,11 @@ public:
float dSquared = _mm_cvtss_f32(_mm_dp_ps(delta, delta, 0x71));
if (dSquared > maxDistanceSquared)
continue;
// Ignore exclusions.
if (exclusions[atomI].find(atomJ) != exclusions[atomI].end())
continue;
neighbors.push_back(make_pair(atomI, atomJ));
}
}
......
......@@ -340,9 +340,10 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
const vector<set<int> >& exclusions, float* forces, float* totalEnergy) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
this->posq = posq;
this->atomParameters = atomParameters;
this->exclusions = exclusions;
this->atomParameters = &atomParameters[0];
this->exclusions = &exclusions[0];
includeEnergy = (totalEnergy != NULL);
// Signal the threads to start running and wait for them to finish.
......@@ -413,7 +414,6 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
threadEnergy = 0;
double* energyPtr = (includeEnergy ? &threadEnergy : NULL);
int numberOfAtoms = atomParameters.size();
threadForce.resize(4*numberOfAtoms, 0.0f);
for (int i = 0; i < 4*numberOfAtoms; i++)
threadForce[i] = 0.0f;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment