Commit 8f168506 authored by peastman's avatar peastman
Browse files

Fixed segfaults on some computers

parent 587ed0c0
...@@ -168,7 +168,6 @@ private: ...@@ -168,7 +168,6 @@ private:
int meshDim[3]; int meshDim[3];
std::vector<float> ewaldScaleX, ewaldScaleY, ewaldScaleDeriv; std::vector<float> ewaldScaleX, ewaldScaleY, ewaldScaleDeriv;
float ewaldDX, ewaldDXInv; float ewaldDX, ewaldDXInv;
__m128 boxSize, invBoxSize, half;
bool isDeleted; bool isDeleted;
int numThreads, waitCount; int numThreads, waitCount;
std::vector<pthread_t> thread; std::vector<pthread_t> thread;
...@@ -196,7 +195,7 @@ private: ...@@ -196,7 +195,7 @@ private:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateOneIxn(int atom1, int atom2, float* forces, double* totalEnergy); void calculateOneIxn(int atom1, int atom2, float* forces, double* totalEnergy, const __m128& boxSize, const __m128& invBoxSize);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -209,13 +208,13 @@ private: ...@@ -209,13 +208,13 @@ private:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateOneEwaldIxn(int atom1, int atom2, float* forces, double* totalEnergy); void calculateOneEwaldIxn(int atom1, int atom2, float* forces, double* totalEnergy, const __m128& boxSize, const __m128& invBoxSize);
/** /**
* Compute the displacement and squared distance between two points, optionally using * Compute the displacement and squared distance between two points, optionally using
* periodic boundary conditions. * periodic boundary conditions.
*/ */
void getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic) const; void getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic, const __m128& boxSize, const __m128& invBoxSize) const;
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
......
...@@ -50,6 +50,7 @@ public: ...@@ -50,6 +50,7 @@ public:
} }
double getSpeed() const; double getSpeed() const;
bool supportsDoublePrecision() const; bool supportsDoublePrecision() const;
static bool isProcessorSupported();
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -155,9 +155,6 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -155,9 +155,6 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
this->periodicBoxSize[0] = periodicBoxSize[0]; this->periodicBoxSize[0] = periodicBoxSize[0];
this->periodicBoxSize[1] = periodicBoxSize[1]; this->periodicBoxSize[1] = periodicBoxSize[1];
this->periodicBoxSize[2] = periodicBoxSize[2]; this->periodicBoxSize[2] = periodicBoxSize[2];
boxSize = _mm_set_ps(0, periodicBoxSize[2], periodicBoxSize[1], periodicBoxSize[0]);
invBoxSize = _mm_set_ps(0, (1/periodicBoxSize[2]), (1/periodicBoxSize[1]), (1/periodicBoxSize[0]));
half = _mm_set1_ps(0.5);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -370,6 +367,8 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -370,6 +367,8 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
if (ewald || pme) { if (ewald || pme) {
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum. // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
__m128 boxSize = _mm_set_ps(0, periodicBoxSize[2], periodicBoxSize[1], periodicBoxSize[0]);
__m128 invBoxSize = _mm_set_ps(0, (1/periodicBoxSize[2]), (1/periodicBoxSize[1]), (1/periodicBoxSize[0]));
for (int i = 0; i < numberOfAtoms; i++) for (int i = 0; i < numberOfAtoms; i++)
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) { for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) { if (*iter > i) {
...@@ -379,7 +378,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -379,7 +378,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
__m128 posI = _mm_loadu_ps(posq+4*ii); __m128 posI = _mm_loadu_ps(posq+4*ii);
__m128 posJ = _mm_loadu_ps(posq+4*jj); __m128 posJ = _mm_loadu_ps(posq+4*jj);
float r2; float r2;
getDeltaR(posJ, posI, deltaR, r2, false); getDeltaR(posJ, posI, deltaR, r2, false, boxSize, invBoxSize);
float r = sqrtf(r2); float r = sqrtf(r2);
float inverseR = 1/r; float inverseR = 1/r;
float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3]; float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3];
...@@ -419,12 +418,14 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& ...@@ -419,12 +418,14 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
threadForce.resize(4*numberOfAtoms, 0.0f); threadForce.resize(4*numberOfAtoms, 0.0f);
for (int i = 0; i < 4*numberOfAtoms; i++) for (int i = 0; i < 4*numberOfAtoms; i++)
threadForce[i] = 0.0f; threadForce[i] = 0.0f;
__m128 boxSize = _mm_set_ps(0, periodicBoxSize[2], periodicBoxSize[1], periodicBoxSize[0]);
__m128 invBoxSize = _mm_set_ps(0, (1/periodicBoxSize[2]), (1/periodicBoxSize[1]), (1/periodicBoxSize[0]));
if (ewald || pme) { if (ewald || pme) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
for (int i = index; i < (int) neighborList->size(); i += numThreads) { for (int i = index; i < (int) neighborList->size(); i += numThreads) {
pair<int, int> pair = (*neighborList)[i]; pair<int, int> pair = (*neighborList)[i];
calculateOneEwaldIxn(pair.first, pair.second, &threadForce[0], energyPtr); calculateOneEwaldIxn(pair.first, pair.second, &threadForce[0], energyPtr, boxSize, invBoxSize);
} }
} }
else if (cutoff) { else if (cutoff) {
...@@ -432,7 +433,7 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& ...@@ -432,7 +433,7 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
for (int i = index; i < (int) neighborList->size(); i += numThreads) { for (int i = index; i < (int) neighborList->size(); i += numThreads) {
pair<int, int> pair = (*neighborList)[i]; pair<int, int> pair = (*neighborList)[i];
calculateOneIxn(pair.first, pair.second, &threadForce[0], energyPtr); calculateOneIxn(pair.first, pair.second, &threadForce[0], energyPtr, boxSize, invBoxSize);
} }
} }
else { else {
...@@ -441,20 +442,20 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double& ...@@ -441,20 +442,20 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
for (int i = index; i < numberOfAtoms; i += numThreads){ for (int i = index; i < numberOfAtoms; i += numThreads){
for (int j = i+1; j < numberOfAtoms; j++) for (int j = i+1; j < numberOfAtoms; j++)
if (exclusions[j].find(i) == exclusions[j].end()) if (exclusions[j].find(i) == exclusions[j].end())
calculateOneIxn(i, j, &threadForce[0], energyPtr); calculateOneIxn(i, j, &threadForce[0], energyPtr, boxSize, invBoxSize);
} }
} }
} }
} }
void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* totalEnergy) { void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* totalEnergy, const __m128& boxSize, const __m128& invBoxSize) {
// get deltaR, R2, and R between 2 atoms // get deltaR, R2, and R between 2 atoms
__m128 deltaR; __m128 deltaR;
__m128 posI = _mm_loadu_ps(posq+4*ii); __m128 posI = _mm_loadu_ps(posq+4*ii);
__m128 posJ = _mm_loadu_ps(posq+4*jj); __m128 posJ = _mm_loadu_ps(posq+4*jj);
float r2; float r2;
getDeltaR(posJ, posI, deltaR, r2, periodic); getDeltaR(posJ, posI, deltaR, r2, periodic, boxSize, invBoxSize);
if (cutoff && r2 >= cutoffDistance*cutoffDistance) if (cutoff && r2 >= cutoffDistance*cutoffDistance)
return; return;
float r = sqrtf(r2); float r = sqrtf(r2);
...@@ -501,12 +502,12 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -501,12 +502,12 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
_mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result)); _mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result));
} }
void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* forces, double* totalEnergy) { void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* forces, double* totalEnergy, const __m128& boxSize, const __m128& invBoxSize) {
__m128 deltaR; __m128 deltaR;
__m128 posI = _mm_loadu_ps(posq+4*ii); __m128 posI = _mm_loadu_ps(posq+4*ii);
__m128 posJ = _mm_loadu_ps(posq+4*jj); __m128 posJ = _mm_loadu_ps(posq+4*jj);
float r2; float r2;
getDeltaR(posJ, posI, deltaR, r2, true); getDeltaR(posJ, posI, deltaR, r2, true, boxSize, invBoxSize);
if (r2 < cutoffDistance*cutoffDistance) { if (r2 < cutoffDistance*cutoffDistance) {
float r = sqrtf(r2); float r = sqrtf(r2);
float inverseR = 1/r; float inverseR = 1/r;
...@@ -546,10 +547,10 @@ void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* forces, doub ...@@ -546,10 +547,10 @@ void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* forces, doub
} }
} }
void CpuNonbondedForce::getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic) const { void CpuNonbondedForce::getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic, const __m128& boxSize, const __m128& invBoxSize) const {
deltaR = _mm_sub_ps(posJ, posI); deltaR = _mm_sub_ps(posJ, posI);
if (periodic) { if (periodic) {
__m128 base = _mm_mul_ps(_mm_floor_ps(_mm_add_ps(_mm_mul_ps(deltaR, invBoxSize), half)), boxSize); __m128 base = _mm_mul_ps(_mm_floor_ps(_mm_add_ps(_mm_mul_ps(deltaR, invBoxSize), _mm_set1_ps(0.5))), boxSize);
deltaR = _mm_sub_ps(deltaR, base); deltaR = _mm_sub_ps(deltaR, base);
} }
r2 = _mm_cvtss_f32(_mm_dp_ps(deltaR, deltaR, 0x71)); r2 = _mm_cvtss_f32(_mm_dp_ps(deltaR, deltaR, 0x71));
......
...@@ -39,13 +39,8 @@ using namespace OpenMM; ...@@ -39,13 +39,8 @@ using namespace OpenMM;
extern "C" OPENMM_EXPORT_CPU void registerPlatforms() { extern "C" OPENMM_EXPORT_CPU void registerPlatforms() {
// Only register this platform if the CPU supports SSE 4.1. // Only register this platform if the CPU supports SSE 4.1.
int cpuInfo[4]; if (CpuPlatform::isProcessorSupported())
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
if ((cpuInfo[2] & ((int) 1 << 19)) != 0)
Platform::registerPlatform(new CpuPlatform()); Platform::registerPlatform(new CpuPlatform());
}
} }
CpuPlatform::CpuPlatform() { CpuPlatform::CpuPlatform() {
...@@ -60,3 +55,15 @@ double CpuPlatform::getSpeed() const { ...@@ -60,3 +55,15 @@ double CpuPlatform::getSpeed() const {
bool CpuPlatform::supportsDoublePrecision() const { bool CpuPlatform::supportsDoublePrecision() const {
return false; return false;
} }
bool CpuPlatform::isProcessorSupported() {
// Make sure the CPU supports SSE 4.1.
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 19)) != 0);
}
return false;
}
...@@ -254,6 +254,10 @@ void testErrorTolerance(NonbondedForce::NonbondedMethod method) { ...@@ -254,6 +254,10 @@ void testErrorTolerance(NonbondedForce::NonbondedMethod method) {
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (!CpuPlatform::isProcessorSupported()) {
cout << "CPU is not supported. Exiting." << endl;
return 0;
}
testEwaldPME(false); testEwaldPME(false);
testEwaldPME(true); testEwaldPME(true);
// testEwald2Ions(); // testEwald2Ions();
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "openmm/internal/AssertionUtilities.h" #include "openmm/internal/AssertionUtilities.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "CpuPlatform.h"
#include "sfmt/SFMT.h" #include "sfmt/SFMT.h"
#include <iostream> #include <iostream>
#include <set> #include <set>
...@@ -95,6 +96,10 @@ void testNeighborList(bool periodic) { ...@@ -95,6 +96,10 @@ void testNeighborList(bool periodic) {
int main() { int main() {
try { try {
if (!CpuPlatform::isProcessorSupported()) {
cout << "CPU is not supported. Exiting." << endl;
return 0;
}
testNeighborList(false); testNeighborList(false);
testNeighborList(true); testNeighborList(true);
} }
......
...@@ -625,6 +625,10 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) { ...@@ -625,6 +625,10 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (!CpuPlatform::isProcessorSupported()) {
cout << "CPU is not supported. Exiting." << endl;
return 0;
}
testCoulomb(); testCoulomb();
testLJ(); testLJ();
testExclusionsAnd14(); testExclusionsAnd14();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment