"vscode:/vscode.git/clone" did not exist on "0d78f22fa250de1fd2768dbd105f6c2668f253ca"
Commit 79011828 authored by peastman's avatar peastman
Browse files

Assorted optimizations

parent f1533707
...@@ -176,9 +176,10 @@ private: ...@@ -176,9 +176,10 @@ private:
pthread_cond_t startCondition, endCondition; pthread_cond_t startCondition, endCondition;
pthread_mutex_t lock; pthread_mutex_t lock;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms;
float* posq; float* posq;
std::vector<std::pair<float, float> > atomParameters; std::pair<float, float> const* atomParameters;
std::vector<std::set<int> > exclusions; std::set<int> const* exclusions;
bool includeEnergy; bool includeEnergy;
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
......
...@@ -117,13 +117,9 @@ public: ...@@ -117,13 +117,9 @@ public:
for (Voxel::const_iterator itemIter = voxel.begin(); itemIter != voxel.end(); ++itemIter) { for (Voxel::const_iterator itemIter = voxel.begin(); itemIter != voxel.end(); ++itemIter) {
const int atomJ = itemIter->second; const int atomJ = itemIter->second;
// Ignore self hits // Avoid duplicate entries.
if (atomI >= atomJ) if (atomJ >= atomI)
continue; break;
// Ignore exclusions.
if (exclusions[atomI].find(atomJ) != exclusions[atomI].end())
continue;
__m128 posJ = _mm_loadu_ps(itemIter->first); __m128 posJ = _mm_loadu_ps(itemIter->first);
__m128 delta = _mm_sub_ps(posJ, posI); __m128 delta = _mm_sub_ps(posJ, posI);
...@@ -134,6 +130,11 @@ public: ...@@ -134,6 +130,11 @@ public:
float dSquared = _mm_cvtss_f32(_mm_dp_ps(delta, delta, 0x71)); float dSquared = _mm_cvtss_f32(_mm_dp_ps(delta, delta, 0x71));
if (dSquared > maxDistanceSquared) if (dSquared > maxDistanceSquared)
continue; continue;
// Ignore exclusions.
if (exclusions[atomI].find(atomJ) != exclusions[atomI].end())
continue;
neighbors.push_back(make_pair(atomI, atomJ)); neighbors.push_back(make_pair(atomI, atomJ));
} }
} }
......
...@@ -340,9 +340,10 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -340,9 +340,10 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
const vector<set<int> >& exclusions, float* forces, float* totalEnergy) { const vector<set<int> >& exclusions, float* forces, float* totalEnergy) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
this->posq = posq; this->posq = posq;
this->atomParameters = atomParameters; this->atomParameters = &atomParameters[0];
this->exclusions = exclusions; this->exclusions = &exclusions[0];
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
...@@ -413,7 +414,6 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& ...@@ -413,7 +414,6 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
threadEnergy = 0; threadEnergy = 0;
double* energyPtr = (includeEnergy ? &threadEnergy : NULL); double* energyPtr = (includeEnergy ? &threadEnergy : NULL);
int numberOfAtoms = atomParameters.size();
threadForce.resize(4*numberOfAtoms, 0.0f); threadForce.resize(4*numberOfAtoms, 0.0f);
for (int i = 0; i < 4*numberOfAtoms; i++) for (int i = 0; i < 4*numberOfAtoms; i++)
threadForce[i] = 0.0f; threadForce[i] = 0.0f;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment