Commit 2db04610 authored by peastman's avatar peastman
Browse files

Merge pull request #873 from peastman/cpuopt

Optimizations to CPU platform
parents 79b85ad7 05381ef1
...@@ -296,7 +296,9 @@ SET(OPENMM_BUILD_SHARED_LIB ON CACHE BOOL "Whether to build shared OpenMM librar ...@@ -296,7 +296,9 @@ SET(OPENMM_BUILD_SHARED_LIB ON CACHE BOOL "Whether to build shared OpenMM librar
SET(EXTRA_LINK_FLAGS ${EXTRA_COMPILE_FLAGS}) SET(EXTRA_LINK_FLAGS ${EXTRA_COMPILE_FLAGS})
IF (CMAKE_SYSTEM_NAME MATCHES "Linux") IF (CMAKE_SYSTEM_NAME MATCHES "Linux")
SET(EXTRA_LINK_FLAGS "${EXTRA_LINK_FLAGS} -Wl,--no-as-needed -lrt") IF (NOT ANDROID)
SET(EXTRA_LINK_FLAGS "${EXTRA_LINK_FLAGS} -Wl,--no-as-needed -lrt")
ENDIF (NOT ANDROID)
ENDIF (CMAKE_SYSTEM_NAME MATCHES "Linux") ENDIF (CMAKE_SYSTEM_NAME MATCHES "Linux")
IF(OPENMM_BUILD_SHARED_LIB) IF(OPENMM_BUILD_SHARED_LIB)
......
...@@ -191,6 +191,18 @@ static inline fvec8 sqrt(const fvec8& v) { ...@@ -191,6 +191,18 @@ static inline fvec8 sqrt(const fvec8& v) {
return fvec8(_mm256_sqrt_ps(v.val)); return fvec8(_mm256_sqrt_ps(v.val));
} }
static inline fvec8 rsqrt(const fvec8& v) {
// Initial estimate of rsqrt().
fvec8 y(_mm256_rsqrt_ps(v.val));
// Perform an iteration of Newton refinement.
fvec8 x2 = v*0.5f;
y *= fvec8(1.5f)-x2*y*y;
return y;
}
static inline float dot8(const fvec8& v1, const fvec8& v2) { static inline float dot8(const fvec8& v1, const fvec8& v2) {
fvec8 result = _mm256_dp_ps(v1, v2, 0xF1); fvec8 result = _mm256_dp_ps(v1, v2, 0xF1);
return _mm_cvtss_f32(result.lowerVec())+_mm_cvtss_f32(result.upperVec()); return _mm_cvtss_f32(result.lowerVec())+_mm_cvtss_f32(result.upperVec());
......
...@@ -251,11 +251,15 @@ static inline fvec4 abs(const fvec4& v) { ...@@ -251,11 +251,15 @@ static inline fvec4 abs(const fvec4& v) {
return vabsq_f32(v); return vabsq_f32(v);
} }
static inline fvec4 sqrt(const fvec4& v) { static inline fvec4 rsqrt(const fvec4& v) {
float32x4_t recipSqrt = vrsqrteq_f32(v); float32x4_t recipSqrt = vrsqrteq_f32(v);
recipSqrt = vmulq_f32(recipSqrt, vrsqrtsq_f32(vmulq_f32(recipSqrt, v), recipSqrt)); recipSqrt = vmulq_f32(recipSqrt, vrsqrtsq_f32(vmulq_f32(recipSqrt, v), recipSqrt));
recipSqrt = vmulq_f32(recipSqrt, vrsqrtsq_f32(vmulq_f32(recipSqrt, v), recipSqrt)); recipSqrt = vmulq_f32(recipSqrt, vrsqrtsq_f32(vmulq_f32(recipSqrt, v), recipSqrt));
return vmulq_f32(v, recipSqrt); return recipSqrt;
}
static inline fvec4 sqrt(const fvec4& v) {
return rsqrt(v)*v;
} }
static inline float dot3(const fvec4& v1, const fvec4& v2) { static inline float dot3(const fvec4& v1, const fvec4& v2) {
......
...@@ -330,7 +330,7 @@ static inline fvec4 ceil(const fvec4& v) { ...@@ -330,7 +330,7 @@ static inline fvec4 ceil(const fvec4& v) {
return truncated + blend(0.0f, 1.0f, truncated<v); return truncated + blend(0.0f, 1.0f, truncated<v);
} }
static inline fvec4 sqrt(const fvec4& v) { static inline fvec4 rsqrt(const fvec4& v) {
// Initial estimate of rsqrt(). // Initial estimate of rsqrt().
ivec4 i = (__m128i) v; ivec4 i = (__m128i) v;
...@@ -343,7 +343,11 @@ static inline fvec4 sqrt(const fvec4& v) { ...@@ -343,7 +343,11 @@ static inline fvec4 sqrt(const fvec4& v) {
y *= 1.5f-x2*y*y; y *= 1.5f-x2*y*y;
y *= 1.5f-x2*y*y; y *= 1.5f-x2*y*y;
y *= 1.5f-x2*y*y; y *= 1.5f-x2*y*y;
return y*v; return y;
}
static inline fvec4 sqrt(const fvec4& v) {
return rsqrt(v)*v;
} }
#endif /*OPENMM_VECTORIZE_PNACL_H_*/ #endif /*OPENMM_VECTORIZE_PNACL_H_*/
......
...@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) { ...@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
return fvec4(_mm_sqrt_ps(v.val)); return fvec4(_mm_sqrt_ps(v.val));
} }
static inline fvec4 rsqrt(const fvec4& v) {
// Initial estimate of rsqrt().
fvec4 y(_mm_rsqrt_ps(v.val));
// Perform an iteration of Newton refinement.
fvec4 x2 = v*0.5f;
y *= fvec4(1.5f)-x2*y*y;
return y;
}
static inline float dot3(const fvec4& v1, const fvec4& v2) { static inline float dot3(const fvec4& v1, const fvec4& v2) {
return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71)); return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013 Stanford University and the Authors. * * Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -61,6 +61,7 @@ public: ...@@ -61,6 +61,7 @@ public:
private: private:
int blockSize; int blockSize;
std::vector<int> sortedAtoms; std::vector<int> sortedAtoms;
std::vector<float> sortedPositions;
std::vector<std::vector<int> > blockNeighbors; std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<char> > blockExclusions; std::vector<std::vector<char> > blockExclusions;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
......
...@@ -56,8 +56,8 @@ protected: ...@@ -56,8 +56,8 @@ protected:
/** /**
* Templatized implementation of calculateBlockIxn. * Templatized implementation of calculateBlockIxn.
*/ */
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize); void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -74,15 +74,15 @@ protected: ...@@ -74,15 +74,15 @@ protected:
/** /**
* Templatized implementation of calculateBlockEwaldIxn. * Templatized implementation of calculateBlockEwaldIxn.
*/ */
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize); void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/** /**
* Compute the displacement and squared distance between a collection of points, optionally using * Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions. * periodic boundary conditions.
*/ */
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void getDeltaR(const float* posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const; void getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
......
...@@ -55,8 +55,8 @@ protected: ...@@ -55,8 +55,8 @@ protected:
/** /**
* Templatized implementation of calculateBlockIxn. * Templatized implementation of calculateBlockIxn.
*/ */
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize); void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -73,15 +73,15 @@ protected: ...@@ -73,15 +73,15 @@ protected:
/** /**
* Templatized implementation of calculateBlockEwaldIxn. * Templatized implementation of calculateBlockEwaldIxn.
*/ */
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize); void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/** /**
* Compute the displacement and squared distance between a collection of points, optionally using * Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions. * periodic boundary conditions.
*/ */
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void getDeltaR(const float* posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const; void getDeltaR(const fvec4& posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
......
...@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) { ...@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
} }
void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) { void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) {
assert(cutoff); assert(useCutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance); assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[2][2] >= 2.0*cutoffDistance); assert(periodicBoxVectors[2][2] >= 2.0*cutoffDistance);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013 Stanford University and the Authors. * * Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -164,8 +164,8 @@ public: ...@@ -164,8 +164,8 @@ public:
return VoxelIndex(y, z); return VoxelIndex(y, z);
} }
void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const float* atomLocations, const vector<VoxelIndex>& atomVoxelIndex) const { void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
neighbors.resize(0); neighbors.resize(0);
exclusions.resize(0); exclusions.resize(0);
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0); fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
...@@ -233,24 +233,34 @@ public: ...@@ -233,24 +233,34 @@ public:
float minx = centerPos[0]; float minx = centerPos[0];
float maxx = centerPos[0]; float maxx = centerPos[0];
fvec4 offset(-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz), 0); float offset[3] = {-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz)};
for (int k = 0; k < (int) blockAtoms.size(); k++) { for (int k = 0; k < (int) blockAtoms.size(); k += 4) {
const float* atomPos = &atomLocations[4*blockAtoms[k]]; fvec4 dist2 = maxDistanceSquared;
fvec4 posVec(atomPos); if (y != atomVoxelIndex[k].y) {
fvec4 delta1 = offset-posVec; fvec4 dy1 = offset[1]-fvec4(&blockAtomY[k]);
fvec4 delta2 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0); fvec4 dy2 = dy1+voxelSizeY;
if (usePeriodic) { if (usePeriodic) {
delta1 -= round(delta1*invBoxSize)*boxSize; dy1 -= round(dy1*invBoxSize[1])*boxSize[1];
delta2 -= round(delta2*invBoxSize)*boxSize; dy2 -= round(dy2*invBoxSize[1])*boxSize[1];
}
fvec4 dy = min(abs(dy1), abs(dy2));
dist2 -= dy*dy;
}
if (z != atomVoxelIndex[k].z) {
fvec4 dz1 = offset[2]-fvec4(&blockAtomZ[k]);
fvec4 dz2 = dz1+voxelSizeZ;
if (usePeriodic) {
dz1 -= round(dz1*invBoxSize[2])*boxSize[2];
dz2 -= round(dz2*invBoxSize[2])*boxSize[2];
}
fvec4 dz = min(abs(dz1), abs(dz2));
dist2 -= dz*dz;
} }
fvec4 delta = min(abs(delta1), abs(delta2)); fvec4 dist = sqrt(dist2);
float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]); int numToCheck = min(4, (int) (blockAtoms.size()-k));
float dz = (z == atomVoxelIndex[k].z ? 0.0f : delta[2]); for (int m = 0; m < numToCheck; m++) {
float dist2 = maxDistanceSquared-dy*dy-dz*dz; minx = min(minx, blockAtomX[k+m]-dist[m]-xoffset);
if (dist2 > 0) { maxx = max(maxx, blockAtomX[k+m]+dist[m]-xoffset);
float dist = sqrtf(dist2);
minx = min(minx, atomPos[0]-dist-xoffset);
maxx = max(maxx, atomPos[0]+dist-xoffset);
} }
} }
if (minx == maxx) if (minx == maxx)
...@@ -290,12 +300,10 @@ public: ...@@ -290,12 +300,10 @@ public:
if (sortedIndex >= lastSortedIndex) if (sortedIndex >= lastSortedIndex)
continue; continue;
fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]); fvec4 atomPos(&sortedPositions[4*sortedIndex]);
fvec4 delta = atomPos-centerPos; fvec4 delta = atomPos-centerPos;
if (periodicRectangular) { if (periodicRectangular)
fvec4 base = round(delta*invBoxSize)*boxSize; delta -= round(delta*invBoxSize)*boxSize;
delta = delta-base;
}
else if (needPeriodic) { else if (needPeriodic) {
delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f); delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f);
delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f); delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f);
...@@ -310,26 +318,34 @@ public: ...@@ -310,26 +318,34 @@ public:
// The distance is large enough that there might not be any actual interactions. // The distance is large enough that there might not be any actual interactions.
// Check individual atom pairs to be sure. // Check individual atom pairs to be sure.
bool any = false; bool anyInteraction = false;
for (int k = 0; k < (int) blockAtoms.size(); k++) { for (int k = 0; k < (int) blockAtoms.size(); k += 4) {
fvec4 pos1(&atomLocations[4*blockAtoms[k]]); fvec4 dx = fvec4(&blockAtomX[k])-atomPos[0];
delta = atomPos-pos1; fvec4 dy = fvec4(&blockAtomY[k])-atomPos[1];
fvec4 dz = fvec4(&blockAtomZ[k])-atomPos[2];
if (periodicRectangular) { if (periodicRectangular) {
fvec4 base = round(delta*invBoxSize)*boxSize; dx -= round(dx*invBoxSize[0])*boxSize[0];
delta = delta-base; dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
} }
else if (needPeriodic) { else if (needPeriodic) {
delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f); fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f); dx -= scale3*periodicBoxVectors[2][0];
delta -= periodicBoxVec4[0]*floorf(delta[0]*recipBoxSize[0]+0.5f); dy -= scale3*periodicBoxVectors[2][1];
dz -= scale3*periodicBoxVectors[2][2];
fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f);
dx -= scale2*periodicBoxVectors[1][0];
dy -= scale2*periodicBoxVectors[1][1];
fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
} }
float r2 = dot3(delta, delta); fvec4 r2 = dx*dx + dy*dy + dz*dz;
if (r2 < maxDistanceSquared) { if (any(r2 < maxDistanceSquared)) {
any = true; anyInteraction = true;
break; break;
} }
} }
if (!any) if (!anyInteraction)
continue; continue;
} }
...@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
blockNeighbors.resize(numBlocks); blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks); blockExclusions.resize(numBlocks);
sortedAtoms.resize(numAtoms); sortedAtoms.resize(numAtoms);
sortedPositions.resize(4*numAtoms);
// Record the parameters for the threads. // Record the parameters for the threads.
...@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
int atomIndex = atomBins[i].second; int atomIndex = atomBins[i].second;
sortedAtoms[i] = atomIndex; sortedAtoms[i] = atomIndex;
fvec4 atomPos(&atomLocations[4*atomIndex]);
atomPos.store(&sortedPositions[4*i]);
voxels.insert(i, &atomLocations[4*atomIndex]); voxels.insert(i, &atomLocations[4*atomIndex]);
} }
voxels.sortItems(); voxels.sortItems();
...@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI ...@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int numBlocks = blockNeighbors.size(); int numBlocks = blockNeighbors.size();
vector<int> blockAtoms; vector<int> blockAtoms;
vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
vector<VoxelIndex> atomVoxelIndex; vector<VoxelIndex> atomVoxelIndex;
for (int i = threadIndex; i < numBlocks; i += numThreads) { for (int i = threadIndex; i < numBlocks; i += numThreads) {
// Find the atoms in this block and compute their bounding box. // Find the atoms in this block and compute their bounding box.
...@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI ...@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
blockAtoms[j] = sortedAtoms[firstIndex+j]; blockAtoms[j] = sortedAtoms[firstIndex+j];
atomVoxelIndex[j] = voxels->getVoxelIndex(&atomLocations[4*blockAtoms[j]]); atomVoxelIndex[j] = voxels->getVoxelIndex(&atomLocations[4*blockAtoms[j]]);
} }
fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]); fvec4 minPos(&sortedPositions[4*firstIndex]);
fvec4 maxPos = minPos; fvec4 maxPos = minPos;
for (int j = 1; j < atomsInBlock; j++) { for (int j = 1; j < atomsInBlock; j++) {
fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]); fvec4 pos(&sortedPositions[4*(firstIndex+j)]);
minPos = min(minPos, pos); minPos = min(minPos, pos);
maxPos = max(maxPos, pos); maxPos = max(maxPos, pos);
} }
voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations, atomVoxelIndex); for (int j = 0; j < atomsInBlock; j++) {
blockAtomX[j] = sortedPositions[4*(firstIndex+j)];
blockAtomY[j] = sortedPositions[4*(firstIndex+j)+1];
blockAtomZ[j] = sortedPositions[4*(firstIndex+j)+2];
}
for (int j = atomsInBlock; j < blockSize; j++) {
blockAtomX[j] = 1e10;
blockAtomY[j] = 1e10;
blockAtomZ[j] = 1e10;
}
voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, blockAtomX, blockAtomY, blockAtomZ, sortedPositions, atomVoxelIndex);
// Record the exclusions for this block. // Record the exclusions for this block.
......
...@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() { ...@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() {
CpuNonbondedForceVec4::CpuNonbondedForceVec4() { CpuNonbondedForceVec4::CpuNonbondedForceVec4() {
} }
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic) // Determine whether we need to apply periodic boundary conditions.
calculateBlockIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else PeriodicType periodicType;
calculateBlockIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize); fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 4; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
} }
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4]; fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]); blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]); fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]);
fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]); fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]);
fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]); fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]);
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]); fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first); fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second); fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) || const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block. // Loop over neighbors for this block.
...@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec4 dx, dy, dz, r2; fvec4 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec4 include; ivec4 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions. // Compute the interactions.
fvec4 r = sqrt(r2); fvec4 inverseR = rsqrt(r2);
fvec4 inverseR = fvec4(1.0f)/r;
fvec4 energy, dEdR; fvec4 energy, dEdR;
float atomEpsilon = atomParameters[atom].second; float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) { if (atomEpsilon != 0.0f) {
...@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
fvec4 r = r2*inverseR;
fvec4 t = blend(0.0f, (r-switchingDistance)*invSwitchingInterval, r>switchingDistance); fvec4 t = blend(0.0f, (r-switchingDistance)*invSwitchingInterval, r>switchingDistance);
fvec4 switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f)); fvec4 switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f));
fvec4 switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval; fvec4 switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval;
...@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
} }
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic) // Determine whether we need to apply periodic boundary conditions.
calculateBlockEwaldIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else PeriodicType periodicType;
calculateBlockEwaldIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize); fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 4; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockEwaldIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockEwaldIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockEwaldIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockEwaldIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
} }
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4]; fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]); blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]); fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]);
fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]); fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]);
fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]); fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]);
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]); fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first); fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second); fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) || const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block. // Loop over neighbors for this block.
...@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec4 dx, dy, dz, r2; fvec4 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec4 include; ivec4 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions. // Compute the interactions.
fvec4 r = sqrt(r2); fvec4 inverseR = rsqrt(r2);
fvec4 inverseR = fvec4(1.0f)/r; fvec4 r = r2*inverseR;
fvec4 energy, dEdR; fvec4 energy, dEdR;
float atomEpsilon = atomParameters[atom].second; float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) { if (atomEpsilon != 0.0f) {
...@@ -272,28 +368,26 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -272,28 +368,26 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]); (fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
} }
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::getDeltaR(const float* posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForceVec4::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0]; dx = x-posI[0];
dy = y-posI[1]; dy = y-posI[1];
dz = z-posI[2]; dz = z-posI[2];
if (periodic) { if (PERIODIC_TYPE == PeriodicTriclinic) {
if (TRICLINIC) { fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f); dx -= scale3*periodicBoxVectors[2][0];
dx -= scale3*periodicBoxVectors[2][0]; dy -= scale3*periodicBoxVectors[2][1];
dy -= scale3*periodicBoxVectors[2][1]; dz -= scale3*periodicBoxVectors[2][2];
dz -= scale3*periodicBoxVectors[2][2]; fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f);
fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f); dx -= scale2*periodicBoxVectors[1][0];
dx -= scale2*periodicBoxVectors[1][0]; dy -= scale2*periodicBoxVectors[1][1];
dy -= scale2*periodicBoxVectors[1][1]; fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f); dx -= scale1*periodicBoxVectors[0][0];
dx -= scale1*periodicBoxVectors[0][0]; }
} else if (PERIODIC_TYPE == PeriodicPerInteraction) {
else { dx -= round(dx*invBoxSize[0])*boxSize[0];
dx -= round(dx*invBoxSize[0])*boxSize[0]; dy -= round(dy*invBoxSize[1])*boxSize[1];
dy -= round(dy*invBoxSize[1])*boxSize[1]; dz -= round(dz*invBoxSize[2])*boxSize[2];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
} }
r2 = dx*dx + dy*dy + dz*dz; r2 = dx*dx + dy*dy + dz*dz;
} }
......
...@@ -50,7 +50,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() { ...@@ -50,7 +50,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
*/ */
bool isVec8Supported() { bool isVec8Supported() {
// Make sure the CPU supports AVX. // Make sure the CPU supports AVX.
int cpuInfo[4]; int cpuInfo[4];
cpuid(cpuInfo, 0); cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) { if (cpuInfo[0] >= 1) {
...@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() { ...@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
CpuNonbondedForceVec8::CpuNonbondedForceVec8() { CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
} }
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic) // Determine whether we need to apply periodic boundary conditions.
calculateBlockIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else PeriodicType periodicType;
calculateBlockIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize); fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 8; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
} }
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
fvec4 blockAtomPosq[8]; fvec4 blockAtomPosq[8];
fvec8 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); fvec8 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
fvec8 blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge; fvec8 blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]); blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
transpose(blockAtomPosq[0], blockAtomPosq[1], blockAtomPosq[2], blockAtomPosq[3], blockAtomPosq[4], blockAtomPosq[5], blockAtomPosq[6], blockAtomPosq[7], blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge); transpose(blockAtomPosq[0], blockAtomPosq[1], blockAtomPosq[2], blockAtomPosq[3], blockAtomPosq[4], blockAtomPosq[5], blockAtomPosq[6], blockAtomPosq[7], blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge);
blockAtomCharge *= ONE_4PI_EPS0; blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first); fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second); fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) || const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block. // Loop over neighbors for this block.
...@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec8 dx, dy, dz, r2; fvec8 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec8 include; ivec8 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions. // Compute the interactions.
fvec8 r = sqrt(r2); fvec8 inverseR = rsqrt(r2);
fvec8 inverseR = fvec8(1.0f)/r;
fvec8 energy, dEdR; fvec8 energy, dEdR;
float atomEpsilon = atomParameters[atom].second; float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) { if (atomEpsilon != 0.0f) {
...@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
fvec8 r = r2*inverseR;
fvec8 t = (r>switchingDistance) & ((r-switchingDistance)*invSwitchingInterval); fvec8 t = (r>switchingDistance) & ((r-switchingDistance)*invSwitchingInterval);
fvec8 switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f)); fvec8 switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f));
fvec8 switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval; fvec8 switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval;
...@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
} }
void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic) // Determine whether we need to apply periodic boundary conditions.
calculateBlockEwaldIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else PeriodicType periodicType;
calculateBlockEwaldIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize); fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 8; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockEwaldIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockEwaldIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockEwaldIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockEwaldIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
} }
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
fvec4 blockAtomPosq[8]; fvec4 blockAtomPosq[8];
fvec8 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); fvec8 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
fvec8 blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge; fvec8 blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]); blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
transpose(blockAtomPosq[0], blockAtomPosq[1], blockAtomPosq[2], blockAtomPosq[3], blockAtomPosq[4], blockAtomPosq[5], blockAtomPosq[6], blockAtomPosq[7], blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge); transpose(blockAtomPosq[0], blockAtomPosq[1], blockAtomPosq[2], blockAtomPosq[3], blockAtomPosq[4], blockAtomPosq[5], blockAtomPosq[6], blockAtomPosq[7], blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge);
blockAtomCharge *= ONE_4PI_EPS0; blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first); fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second); fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) || const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block. // Loop over neighbors for this block.
...@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec8 dx, dy, dz, r2; fvec8 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec8 include; ivec8 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions. // Compute the interactions.
fvec8 r = sqrt(r2); fvec8 inverseR = rsqrt(r2);
fvec8 inverseR = fvec8(1.0f)/r; fvec8 r = r2*inverseR;
fvec8 energy, dEdR; fvec8 energy, dEdR;
float atomEpsilon = atomParameters[atom].second; float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) { if (atomEpsilon != 0.0f) {
...@@ -268,7 +364,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -268,7 +364,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
} }
fvec8 chargeProd = blockAtomCharge*posq[4*atom+3]; fvec8 chargeProd = blockAtomCharge*posq[4*atom+3];
dEdR += chargeProd*inverseR*ewaldScaleFunction(r); dEdR += chargeProd*inverseR*ewaldScaleFunction(r);
dEdR *= inverseR*inverseR; dEdR *= inverseR*inverseR;
// Accumulate energies. // Accumulate energies.
...@@ -302,28 +398,26 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -302,28 +398,26 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]); (fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
} }
template <bool TRICLINIC> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec8::getDeltaR(const float* posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForceVec8::getDeltaR(const fvec4& posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0]; dx = x-posI[0];
dy = y-posI[1]; dy = y-posI[1];
dz = z-posI[2]; dz = z-posI[2];
if (periodic) { if (PERIODIC_TYPE == PeriodicTriclinic) {
if (TRICLINIC) { fvec8 scale3 = floor(dz*recipBoxSize[2]+0.5f);
fvec8 scale3 = floor(dz*recipBoxSize[2]+0.5f); dx -= scale3*periodicBoxVectors[2][0];
dx -= scale3*periodicBoxVectors[2][0]; dy -= scale3*periodicBoxVectors[2][1];
dy -= scale3*periodicBoxVectors[2][1]; dz -= scale3*periodicBoxVectors[2][2];
dz -= scale3*periodicBoxVectors[2][2]; fvec8 scale2 = floor(dy*recipBoxSize[1]+0.5f);
fvec8 scale2 = floor(dy*recipBoxSize[1]+0.5f); dx -= scale2*periodicBoxVectors[1][0];
dx -= scale2*periodicBoxVectors[1][0]; dy -= scale2*periodicBoxVectors[1][1];
dy -= scale2*periodicBoxVectors[1][1]; fvec8 scale1 = floor(dx*recipBoxSize[0]+0.5f);
fvec8 scale1 = floor(dx*recipBoxSize[0]+0.5f); dx -= scale1*periodicBoxVectors[0][0];
dx -= scale1*periodicBoxVectors[0][0]; }
} else if (PERIODIC_TYPE == PeriodicPerInteraction) {
else { dx -= round(dx*invBoxSize[0])*boxSize[0];
dx -= round(dx*invBoxSize[0])*boxSize[0]; dy -= round(dy*invBoxSize[1])*boxSize[1];
dy -= round(dy*invBoxSize[1])*boxSize[1]; dz -= round(dz*invBoxSize[2])*boxSize[2];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
} }
r2 = dx*dx + dy*dy + dz*dz; r2 = dx*dx + dy*dy + dz*dz;
} }
......
...@@ -382,9 +382,9 @@ void ObcParameters::setPeriodic(OpenMM::RealVec* vectors) { ...@@ -382,9 +382,9 @@ void ObcParameters::setPeriodic(OpenMM::RealVec* vectors) {
assert(_cutoff); assert(_cutoff);
assert(boxSize[0][0] >= 2.0*_cutoffDistance); assert(vectors[0][0] >= 2.0*_cutoffDistance);
assert(boxSize[1][1] >= 2.0*_cutoffDistance); assert(vectors[1][1] >= 2.0*_cutoffDistance);
assert(boxSize[2][2] >= 2.0*_cutoffDistance); assert(vectors[2][2] >= 2.0*_cutoffDistance);
_periodic = true; _periodic = true;
_periodicBoxVectors[0] = vectors[0]; _periodicBoxVectors[0] = vectors[0];
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2014 Stanford University and the Authors. * * Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -148,6 +148,7 @@ void testMathFunctions() { ...@@ -148,6 +148,7 @@ void testMathFunctions() {
ASSERT_VEC4_EQUAL(min(f1, f2), 0.4, 1.2, -1.2, -5.0); ASSERT_VEC4_EQUAL(min(f1, f2), 0.4, 1.2, -1.2, -5.0);
ASSERT_VEC4_EQUAL(max(f1, f2), 1.1, 1.9, 1.3, -3.8); ASSERT_VEC4_EQUAL(max(f1, f2), 1.1, 1.9, 1.3, -3.8);
ASSERT_VEC4_EQUAL(sqrt(fvec4(1.5, 3.1, 4.0, 15.0)), sqrt(1.5), sqrt(3.1), sqrt(4.0), sqrt(15.0)); ASSERT_VEC4_EQUAL(sqrt(fvec4(1.5, 3.1, 4.0, 15.0)), sqrt(1.5), sqrt(3.1), sqrt(4.0), sqrt(15.0));
ASSERT_VEC4_EQUAL(rsqrt(fvec4(1.5, 3.1, 4.0, 15.0)), 1.0/sqrt(1.5), 1.0/sqrt(3.1), 1.0/sqrt(4.0), 1.0/sqrt(15.0));
ASSERT_EQUAL_TOL(f1[0]*f2[0]+f1[1]*f2[1]+f1[2]*f2[2], dot3(f1, f2), 1e-6); ASSERT_EQUAL_TOL(f1[0]*f2[0]+f1[1]*f2[1]+f1[2]*f2[2], dot3(f1, f2), 1e-6);
ASSERT_EQUAL_TOL(f1[0]*f2[0]+f1[1]*f2[1]+f1[2]*f2[2]+f1[3]*f2[3], dot4(f1, f2), 1e-6); ASSERT_EQUAL_TOL(f1[0]*f2[0]+f1[1]*f2[1]+f1[2]*f2[2]+f1[3]*f2[3], dot4(f1, f2), 1e-6);
ASSERT(any(f1 > 0.5)); ASSERT(any(f1 > 0.5));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment