Commit 71d33617 authored by John Chodera (MSKCC)'s avatar John Chodera (MSKCC)
Browse files

Merge remote-tracking branch 'upstream/master'

parents eb232608 9da36463
......@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
return fvec4(_mm_sqrt_ps(v.val));
}
static inline fvec4 rsqrt(const fvec4& v) {
// Initial estimate of rsqrt().
fvec4 y(_mm_rsqrt_ps(v.val));
// Perform an iteration of Newton refinement.
fvec4 x2 = v*0.5f;
y *= fvec4(1.5f)-x2*y*y;
return y;
}
static inline float dot3(const fvec4& v1, const fvec4& v2) {
return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Portions copyright (c) 2010-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -96,3 +96,6 @@ ForceImpl* CMAPTorsionForce::createImpl() const {
return new CMAPTorsionForceImpl(*this);
}
void CMAPTorsionForce::updateParametersInContext(Context& context) {
dynamic_cast<CMAPTorsionForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Portions copyright (c) 2010-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -155,3 +155,7 @@ void CMAPTorsionForceImpl::calcMapDerivatives(int size, const vector<double>& en
}
}
}
void CMAPTorsionForceImpl::updateParametersInContext(ContextImpl& context) {
kernel.getAs<CalcCMAPTorsionForceKernel>().copyParametersToContext(context, owner);
}
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -61,6 +61,7 @@ public:
private:
int blockSize;
std::vector<int> sortedAtoms;
std::vector<float> sortedPositions;
std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<char> > blockExclusions;
// The following variables are used to make information accessible to the individual threads.
......
......@@ -56,8 +56,8 @@ protected:
/**
* Templatized implementation of calculateBlockIxn.
*/
template <bool TRICLINIC>
void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
template <int PERIODIC_TYPE>
void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**---------------------------------------------------------------------------------------
......@@ -74,15 +74,15 @@ protected:
/**
* Templatized implementation of calculateBlockEwaldIxn.
*/
template <bool TRICLINIC>
void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
template <int PERIODIC_TYPE>
void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template <bool TRICLINIC>
void getDeltaR(const float* posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
template <int PERIODIC_TYPE>
void getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Compute a fast approximation to erfc(x).
......
......@@ -55,8 +55,8 @@ protected:
/**
* Templatized implementation of calculateBlockIxn.
*/
template <bool TRICLINIC>
void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
template <int PERIODIC_TYPE>
void calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**---------------------------------------------------------------------------------------
......@@ -73,15 +73,15 @@ protected:
/**
* Templatized implementation of calculateBlockEwaldIxn.
*/
template <bool TRICLINIC>
void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
template <int PERIODIC_TYPE>
void calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template <bool TRICLINIC>
void getDeltaR(const float* posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
template <int PERIODIC_TYPE>
void getDeltaR(const fvec4& posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Compute a fast approximation to erfc(x).
......
......@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
}
void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) {
assert(cutoff);
assert(useCutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[2][2] >= 2.0*cutoffDistance);
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -165,7 +165,7 @@ public:
return VoxelIndex(y, z);
}
void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const float* atomLocations, const vector<VoxelIndex>& atomVoxelIndex) const {
void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
neighbors.resize(0);
exclusions.resize(0);
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
......@@ -233,24 +233,34 @@ public:
float minx = centerPos[0];
float maxx = centerPos[0];
fvec4 offset(-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz), 0);
for (int k = 0; k < (int) blockAtoms.size(); k++) {
const float* atomPos = &atomLocations[4*blockAtoms[k]];
fvec4 posVec(atomPos);
fvec4 delta1 = offset-posVec;
fvec4 delta2 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
float offset[3] = {-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz)};
for (int k = 0; k < (int) blockAtoms.size(); k += 4) {
fvec4 dist2 = maxDistanceSquared;
if (y != atomVoxelIndex[k].y) {
fvec4 dy1 = offset[1]-fvec4(&blockAtomY[k]);
fvec4 dy2 = dy1+voxelSizeY;
if (usePeriodic) {
delta1 -= round(delta1*invBoxSize)*boxSize;
delta2 -= round(delta2*invBoxSize)*boxSize;
dy1 -= round(dy1*invBoxSize[1])*boxSize[1];
dy2 -= round(dy2*invBoxSize[1])*boxSize[1];
}
fvec4 delta = min(abs(delta1), abs(delta2));
float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
float dz = (z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
float dist2 = maxDistanceSquared-dy*dy-dz*dz;
if (dist2 > 0) {
float dist = sqrtf(dist2);
minx = min(minx, atomPos[0]-dist-xoffset);
maxx = max(maxx, atomPos[0]+dist-xoffset);
fvec4 dy = min(abs(dy1), abs(dy2));
dist2 -= dy*dy;
}
if (z != atomVoxelIndex[k].z) {
fvec4 dz1 = offset[2]-fvec4(&blockAtomZ[k]);
fvec4 dz2 = dz1+voxelSizeZ;
if (usePeriodic) {
dz1 -= round(dz1*invBoxSize[2])*boxSize[2];
dz2 -= round(dz2*invBoxSize[2])*boxSize[2];
}
fvec4 dz = min(abs(dz1), abs(dz2));
dist2 -= dz*dz;
}
fvec4 dist = sqrt(dist2);
int numToCheck = min(4, (int) (blockAtoms.size()-k));
for (int m = 0; m < numToCheck; m++) {
minx = min(minx, blockAtomX[k+m]-dist[m]-xoffset);
maxx = max(maxx, blockAtomX[k+m]+dist[m]-xoffset);
}
}
if (minx == maxx)
......@@ -290,12 +300,10 @@ public:
if (sortedIndex >= lastSortedIndex)
continue;
fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]);
fvec4 atomPos(&sortedPositions[4*sortedIndex]);
fvec4 delta = atomPos-centerPos;
if (periodicRectangular) {
fvec4 base = round(delta*invBoxSize)*boxSize;
delta = delta-base;
}
if (periodicRectangular)
delta -= round(delta*invBoxSize)*boxSize;
else if (needPeriodic) {
delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f);
delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f);
......@@ -310,26 +318,34 @@ public:
// The distance is large enough that there might not be any actual interactions.
// Check individual atom pairs to be sure.
bool any = false;
for (int k = 0; k < (int) blockAtoms.size(); k++) {
fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
delta = atomPos-pos1;
bool anyInteraction = false;
for (int k = 0; k < (int) blockAtoms.size(); k += 4) {
fvec4 dx = fvec4(&blockAtomX[k])-atomPos[0];
fvec4 dy = fvec4(&blockAtomY[k])-atomPos[1];
fvec4 dz = fvec4(&blockAtomZ[k])-atomPos[2];
if (periodicRectangular) {
fvec4 base = round(delta*invBoxSize)*boxSize;
delta = delta-base;
dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
else if (needPeriodic) {
delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f);
delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f);
delta -= periodicBoxVec4[0]*floorf(delta[0]*recipBoxSize[0]+0.5f);
}
float r2 = dot3(delta, delta);
if (r2 < maxDistanceSquared) {
any = true;
fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
dx -= scale3*periodicBoxVectors[2][0];
dy -= scale3*periodicBoxVectors[2][1];
dz -= scale3*periodicBoxVectors[2][2];
fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f);
dx -= scale2*periodicBoxVectors[1][0];
dy -= scale2*periodicBoxVectors[1][1];
fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
}
fvec4 r2 = dx*dx + dy*dy + dz*dz;
if (any(r2 < maxDistanceSquared)) {
anyInteraction = true;
break;
}
}
if (!any)
if (!anyInteraction)
continue;
}
......@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks);
sortedAtoms.resize(numAtoms);
sortedPositions.resize(4*numAtoms);
// Record the parameters for the threads.
......@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
for (int i = 0; i < numAtoms; i++) {
int atomIndex = atomBins[i].second;
sortedAtoms[i] = atomIndex;
fvec4 atomPos(&atomLocations[4*atomIndex]);
atomPos.store(&sortedPositions[4*i]);
voxels.insert(i, &atomLocations[4*atomIndex]);
}
voxels.sortItems();
......@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int numBlocks = blockNeighbors.size();
vector<int> blockAtoms;
vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
vector<VoxelIndex> atomVoxelIndex;
for (int i = threadIndex; i < numBlocks; i += numThreads) {
// Find the atoms in this block and compute their bounding box.
......@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
blockAtoms[j] = sortedAtoms[firstIndex+j];
atomVoxelIndex[j] = voxels->getVoxelIndex(&atomLocations[4*blockAtoms[j]]);
}
fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
fvec4 minPos(&sortedPositions[4*firstIndex]);
fvec4 maxPos = minPos;
for (int j = 1; j < atomsInBlock; j++) {
fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
fvec4 pos(&sortedPositions[4*(firstIndex+j)]);
minPos = min(minPos, pos);
maxPos = max(maxPos, pos);
}
voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations, atomVoxelIndex);
for (int j = 0; j < atomsInBlock; j++) {
blockAtomX[j] = sortedPositions[4*(firstIndex+j)];
blockAtomY[j] = sortedPositions[4*(firstIndex+j)+1];
blockAtomZ[j] = sortedPositions[4*(firstIndex+j)+2];
}
for (int j = atomsInBlock; j < blockSize; j++) {
blockAtomX[j] = 1e10;
blockAtomY[j] = 1e10;
blockAtomZ[j] = 1e10;
}
voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, blockAtomX, blockAtomY, blockAtomZ, sortedPositions, atomVoxelIndex);
// Record the exclusions for this block.
......
......@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() {
CpuNonbondedForceVec4::CpuNonbondedForceVec4() {
}
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 4; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
calculateBlockIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
for (int i = 0; i < 4; i++)
for (int i = 0; i < 4; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]);
fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]);
fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]);
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) ||
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block.
......@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
fvec4 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec4 include;
char excl = exclusions[i];
if (excl == 0)
......@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
fvec4 r = sqrt(r2);
fvec4 inverseR = fvec4(1.0f)/r;
fvec4 inverseR = rsqrt(r2);
fvec4 energy, dEdR;
float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) {
......@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
fvec4 r = r2*inverseR;
fvec4 t = blend(0.0f, (r-switchingDistance)*invSwitchingInterval, r>switchingDistance);
fvec4 switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f));
fvec4 switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval;
......@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
}
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockEwaldIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 4; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
calculateBlockEwaldIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockEwaldIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockEwaldIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockEwaldIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockEwaldIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
for (int i = 0; i < 4; i++)
for (int i = 0; i < 4; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
fvec4 blockAtomX = fvec4(blockAtomPosq[0][0], blockAtomPosq[1][0], blockAtomPosq[2][0], blockAtomPosq[3][0]);
fvec4 blockAtomY = fvec4(blockAtomPosq[0][1], blockAtomPosq[1][1], blockAtomPosq[2][1], blockAtomPosq[3][1]);
fvec4 blockAtomZ = fvec4(blockAtomPosq[0][2], blockAtomPosq[1][2], blockAtomPosq[2][2], blockAtomPosq[3][2]);
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) ||
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block.
......@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
fvec4 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec4 include;
char excl = exclusions[i];
if (excl == 0)
......@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
fvec4 r = sqrt(r2);
fvec4 inverseR = fvec4(1.0f)/r;
fvec4 inverseR = rsqrt(r2);
fvec4 r = r2*inverseR;
fvec4 energy, dEdR;
float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) {
......@@ -272,13 +368,12 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec4::getDeltaR(const float* posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0];
dy = y-posI[1];
dz = z-posI[2];
if (periodic) {
if (TRICLINIC) {
if (PERIODIC_TYPE == PeriodicTriclinic) {
fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
dx -= scale3*periodicBoxVectors[2][0];
dy -= scale3*periodicBoxVectors[2][1];
......@@ -289,12 +384,11 @@ void CpuNonbondedForceVec4::getDeltaR(const float* posI, const fvec4& x, const f
fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
}
else {
else if (PERIODIC_TYPE == PeriodicPerInteraction) {
dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
}
r2 = dx*dx + dy*dy + dz*dz;
}
......
......@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
}
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 8; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
calculateBlockIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
fvec4 blockAtomPosq[8];
fvec8 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
fvec8 blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
for (int i = 0; i < 8; i++)
for (int i = 0; i < 8; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
transpose(blockAtomPosq[0], blockAtomPosq[1], blockAtomPosq[2], blockAtomPosq[3], blockAtomPosq[4], blockAtomPosq[5], blockAtomPosq[6], blockAtomPosq[7], blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge);
blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) ||
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block.
......@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
fvec8 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec8 include;
char excl = exclusions[i];
if (excl == 0)
......@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
fvec8 r = sqrt(r2);
fvec8 inverseR = fvec8(1.0f)/r;
fvec8 inverseR = rsqrt(r2);
fvec8 energy, dEdR;
float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) {
......@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
fvec8 r = r2*inverseR;
fvec8 t = (r>switchingDistance) & ((r-switchingDistance)*invSwitchingInterval);
fvec8 switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f));
fvec8 switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval;
......@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
}
void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockEwaldIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < 8; i++) {
minx = min(minx, posq[4*blockAtom[i]]);
maxx = max(maxx, posq[4*blockAtom[i]]);
miny = min(miny, posq[4*blockAtom[i]+1]);
maxy = max(maxy, posq[4*blockAtom[i]+1]);
minz = min(minz, posq[4*blockAtom[i]+2]);
maxz = max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
calculateBlockEwaldIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockEwaldIxnImpl<NoPeriodic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockEwaldIxnImpl<PeriodicPerAtom>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockEwaldIxnImpl<PeriodicPerInteraction>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockEwaldIxnImpl<PeriodicTriclinic>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
fvec4 blockAtomPosq[8];
fvec8 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
fvec8 blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
for (int i = 0; i < 8; i++)
for (int i = 0; i < 8; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
}
transpose(blockAtomPosq[0], blockAtomPosq[1], blockAtomPosq[2], blockAtomPosq[3], blockAtomPosq[4], blockAtomPosq[5], blockAtomPosq[6], blockAtomPosq[7], blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge);
blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
bool needPeriodic = (periodic && (any(blockAtomX < cutoffDistance) || any(blockAtomY < cutoffDistance) || any(blockAtomZ < cutoffDistance) ||
any(blockAtomX > boxSize[0]-cutoffDistance) || any(blockAtomY > boxSize[1]-cutoffDistance) || any(blockAtomZ > boxSize[2]-cutoffDistance)));
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block.
......@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
fvec8 dx, dy, dz, r2;
getDeltaR<TRICLINIC>(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec8 include;
char excl = exclusions[i];
if (excl == 0)
......@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
fvec8 r = sqrt(r2);
fvec8 inverseR = fvec8(1.0f)/r;
fvec8 inverseR = rsqrt(r2);
fvec8 r = r2*inverseR;
fvec8 energy, dEdR;
float atomEpsilon = atomParameters[atom].second;
if (atomEpsilon != 0.0f) {
......@@ -302,13 +398,12 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec8::getDeltaR(const float* posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec8::getDeltaR(const fvec4& posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0];
dy = y-posI[1];
dz = z-posI[2];
if (periodic) {
if (TRICLINIC) {
if (PERIODIC_TYPE == PeriodicTriclinic) {
fvec8 scale3 = floor(dz*recipBoxSize[2]+0.5f);
dx -= scale3*periodicBoxVectors[2][0];
dy -= scale3*periodicBoxVectors[2][1];
......@@ -319,12 +414,11 @@ void CpuNonbondedForceVec8::getDeltaR(const float* posI, const fvec8& x, const f
fvec8 scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
}
else {
else if (PERIODIC_TYPE == PeriodicPerInteraction) {
dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
}
r2 = dx*dx + dy*dy + dz*dz;
}
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -520,11 +520,19 @@ public:
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CMAPTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private:
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
const System& system;
std::vector<int2> mapPositionsVec;
CudaArray* coefficients;
CudaArray* mapPositions;
CudaArray* torsionMaps;
......@@ -1274,7 +1282,8 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordChangedParameters(ContextImpl& context);
CudaContext& cu;
double prevStepSize;
double prevStepSize, energy;
float energyFloat;
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent;
......@@ -1295,7 +1304,7 @@ private:
std::vector<std::vector<CUfunction> > kernels;
std::vector<std::vector<std::vector<void*> > > kernelArgs;
std::vector<void*> kineticEnergyArgs;
CUfunction sumPotentialEnergyKernel, randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
CUfunction randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<bool> needsForces;
std::vector<bool> needsEnergy;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -345,6 +345,13 @@ public:
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CMAPTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2014 Stanford University and the Authors. *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -32,6 +32,7 @@
#include "openmm/VirtualSite.h"
#include "quern.h"
#include "CudaExpressionUtilities.h"
#include "ReferenceCCMAAlgorithm.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
......@@ -304,157 +305,54 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
int numCCMA = (int) ccmaConstraints.size();
if (numCCMA > 0) {
vector<vector<int> > atomConstraints(context.getNumAtoms());
for (int i = 0; i < numCCMA; i++) {
atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
}
vector<vector<int> > linkedConstraints(numCCMA);
for (unsigned atom = 0; atom < atomConstraints.size(); atom++) {
for (unsigned i = 0; i < atomConstraints[atom].size(); i++)
for (unsigned j = 0; j < i; j++) {
int c1 = atomConstraints[atom][i];
int c2 = atomConstraints[atom][j];
linkedConstraints[c1].push_back(c2);
linkedConstraints[c2].push_back(c1);
}
}
int maxLinks = 0;
for (unsigned i = 0; i < linkedConstraints.size(); i++)
maxLinks = max(maxLinks, (int) linkedConstraints[i].size());
int maxAtomConstraints = 0;
for (unsigned i = 0; i < atomConstraints.size(); i++)
maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());
// Record information needed by ReferenceCCMAAlgorithm.
// Compute the constraint coupling matrix
vector<vector<int> > atomAngles(numAtoms);
HarmonicAngleForce const* angleForce = NULL;
for (int i = 0; i < system.getNumForces() && angleForce == NULL; i++)
angleForce = dynamic_cast<HarmonicAngleForce const*>(&system.getForce(i));
if (angleForce != NULL)
for (int i = 0; i < angleForce->getNumAngles(); i++) {
int particle1, particle2, particle3;
vector<pair<int, int> > refIndices(numCCMA);
vector<RealOpenMM> refDistance(numCCMA);
for (int i = 0; i < numCCMA; i++) {
int index = ccmaConstraints[i];
refIndices[i] = make_pair(atom1[index], atom2[index]);
refDistance[i] = distance[index];
}
vector<RealOpenMM> refMasses(numAtoms);
for (int i = 0; i < numAtoms; ++i)
refMasses[i] = (RealOpenMM) system.getParticleMass(i);
// Look up angles for CCMA.
vector<ReferenceCCMAAlgorithm::AngleInfo> angles;
for (int i = 0; i < system.getNumForces(); i++) {
const HarmonicAngleForce* force = dynamic_cast<const HarmonicAngleForce*>(&system.getForce(i));
if (force != NULL) {
for (int j = 0; j < force->getNumAngles(); j++) {
int atom1, atom2, atom3;
double angle, k;
angleForce->getAngleParameters(i, particle1, particle2, particle3, angle, k);
atomAngles[particle2].push_back(i);
}
vector<vector<pair<int, double> > > matrix(numCCMA);
for (int j = 0; j < numCCMA; j++) {
for (int k = 0; k < numCCMA; k++) {
if (j == k) {
matrix[j].push_back(pair<int, double>(j, 1.0));
continue;
}
double scale;
int cj = ccmaConstraints[j];
int ck = ccmaConstraints[k];
int atomj0 = atom1[cj];
int atomj1 = atom2[cj];
int atomk0 = atom1[ck];
int atomk1 = atom2[ck];
int atoma, atomb, atomc;
double imj0 = 1.0/system.getParticleMass(atomj0);
double imj1 = 1.0/system.getParticleMass(atomj1);
if (atomj0 == atomk0) {
atoma = atomj1;
atomb = atomj0;
atomc = atomk1;
scale = imj0/(imj0+imj1);
}
else if (atomj1 == atomk1) {
atoma = atomj0;
atomb = atomj1;
atomc = atomk0;
scale = imj1/(imj0+imj1);
}
else if (atomj0 == atomk1) {
atoma = atomj1;
atomb = atomj0;
atomc = atomk0;
scale = imj0/(imj0+imj1);
}
else if (atomj1 == atomk0) {
atoma = atomj0;
atomb = atomj1;
atomc = atomk1;
scale = imj1/(imj0+imj1);
}
else
continue; // These constraints are not connected.
// Look for a third constraint forming a triangle with these two.
bool foundConstraint = false;
for (int m = 0; m < numCCMA; m++) {
int other = ccmaConstraints[m];
if ((atom1[other] == atoma && atom2[other] == atomc) || (atom1[other] == atomc && atom2[other] == atoma)) {
double d1 = distance[cj];
double d2 = distance[ck];
double d3 = distance[other];
matrix[j].push_back(pair<int, double>(k, scale*(d1*d1+d2*d2-d3*d3)/(2.0*d1*d2)));
foundConstraint = true;
break;
}
}
if (!foundConstraint && angleForce != NULL) {
// We didn't find one, so look for an angle force field term.
const vector<int>& angleCandidates = atomAngles[atomb];
for (vector<int>::const_iterator iter = angleCandidates.begin(); iter != angleCandidates.end(); iter++) {
int particle1, particle2, particle3;
double angle, ka;
angleForce->getAngleParameters(*iter, particle1, particle2, particle3, angle, ka);
if ((particle1 == atoma && particle3 == atomc) || (particle3 == atoma && particle1 == atomc)) {
matrix[j].push_back(pair<int, double>(k, scale*cos(angle)));
break;
}
force->getAngleParameters(j, atom1, atom2, atom3, angle, k);
angles.push_back(ReferenceCCMAAlgorithm::AngleInfo(atom1, atom2, atom3, (RealOpenMM) angle));
}
}
}
}
// Invert it using QR.
vector<int> matrixRowStart;
vector<int> matrixColIndex;
vector<double> matrixValue;
for (int i = 0; i < numCCMA; i++) {
matrixRowStart.push_back(matrixValue.size());
for (int j = 0; j < (int) matrix[i].size(); j++) {
pair<int, double> element = matrix[i][j];
matrixColIndex.push_back(element.first);
matrixValue.push_back(element.second);
}
}
matrixRowStart.push_back(matrixValue.size());
int *qRowStart, *qColIndex, *rRowStart, *rColIndex;
double *qValue, *rValue;
int result = QUERN_compute_qr(numCCMA, numCCMA, &matrixRowStart[0], &matrixColIndex[0], &matrixValue[0], NULL,
&qRowStart, &qColIndex, &qValue, &rRowStart, &rColIndex, &rValue);
vector<double> rhs(numCCMA);
matrix.clear();
matrix.resize(numCCMA);
for (int i = 0; i < numCCMA; i++) {
// Extract column i of the inverse matrix.
// Create a ReferenceCCMAAlgorithm. It will build and invert the constraint matrix for us.
for (int j = 0; j < numCCMA; j++)
rhs[j] = (i == j ? 1.0 : 0.0);
result = QUERN_multiply_with_q_transpose(numCCMA, qRowStart, qColIndex, qValue, &rhs[0]);
result = QUERN_solve_with_r(numCCMA, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]);
for (int j = 0; j < numCCMA; j++) {
double value = rhs[j]*distance[ccmaConstraints[i]]/distance[ccmaConstraints[j]];
if (abs(value) > 0.1)
matrix[j].push_back(pair<int, double>(i, value));
}
}
QUERN_free_result(qRowStart, qColIndex, qValue);
QUERN_free_result(rRowStart, rColIndex, rValue);
ReferenceCCMAAlgorithm ccma(numAtoms, numCCMA, refIndices, refDistance, refMasses, angles, 0.1);
vector<vector<pair<int, double> > > matrix = ccma.getMatrix();
int maxRowElements = 0;
for (unsigned i = 0; i < matrix.size(); i++)
maxRowElements = max(maxRowElements, (int) matrix[i].size());
maxRowElements++;
// Build the list of constraints for each atom.
vector<vector<int> > atomConstraints(context.getNumAtoms());
for (int i = 0; i < numCCMA; i++) {
atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
}
int maxAtomConstraints = 0;
for (unsigned i = 0; i < atomConstraints.size(); i++)
maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());
// Sort the constraints.
vector<int> constraintOrder(numCCMA);
......
......@@ -1127,7 +1127,7 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
return;
int numMaps = force.getNumMaps();
vector<float4> coeffVec;
vector<int2> mapPositionsVec(numMaps);
mapPositionsVec.resize(numMaps);
vector<double> energy;
vector<vector<double> > c;
int currentPosition = 0;
......@@ -1166,6 +1166,49 @@ double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includ
return 0.0;
}
void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force) {
int numMaps = force.getNumMaps();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (mapPositions->getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps->getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
vector<float4> coeffVec;
vector<double> energy;
vector<vector<double> > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
if (size != mapPositionsVec[i].y)
throw OpenMMException("updateParametersInContext: The size of a map has changed");
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(make_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(make_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(make_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
coefficients->upload(coeffVec);
// Update the indices.
vector<int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
torsionMaps->upload(torsionMapsVec);
}
class CudaCustomTorsionForceInfo : public CudaForceInfo {
public:
CudaCustomTorsionForceInfo(const CustomTorsionForce& force) : force(force) {
......@@ -5688,7 +5731,7 @@ string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& vari
variables["dt"] = "dt[0].y";
variables["uniform"] = "uniform";
variables["gaussian"] = "gaussian";
variables[energyName] = "energy[0]";
variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++)
......@@ -5724,7 +5767,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
variables["m"] = "mass";
variables["dt"] = "stepSize";
if (energyName != "")
variables[energyName] = "energy[0]";
variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
......@@ -5957,7 +6000,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1.push_back(NULL);
args1.push_back(NULL);
args1.push_back(NULL);
args1.push_back(&potentialEnergy->getDevicePointer());
if (cu.getUseDoublePrecision())
args1.push_back(&energy);
else
args1.push_back(&energyFloat);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
args1.push_back(&perDofValues->getBuffers()[i].getMemory());
kernelArgs[step].push_back(args1);
......@@ -6006,7 +6052,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args.push_back(&contextParameterValues->getDevicePointer());
args.push_back(NULL);
args.push_back(NULL);
args.push_back(&potentialEnergy->getDevicePointer());
if (cu.getUseDoublePrecision())
args.push_back(&energy);
else
args.push_back(&energyFloat);
kernelArgs[step].push_back(args);
}
else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
......@@ -6046,13 +6095,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// Create the kernel for summing the potential energy.
defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(cu.getEnergyBuffer().getSize());
CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
sumPotentialEnergyKernel = cu.getKernel(module, cu.getUseDoublePrecision() ? "computeDoubleSum" : "computeFloatSum");
// Create the kernel for computing kinetic energy.
stringstream computeKE;
......@@ -6074,10 +6116,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA");
module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
kineticEnergyKernel = cu.getKernel(module, "computePerDof");
kineticEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
kineticEnergyArgs.push_back(NULL);
......@@ -6091,7 +6134,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer());
kineticEnergyArgs.push_back(&potentialEnergy->getDevicePointer());
if (cu.getUseDoublePrecision())
kineticEnergyArgs.push_back(&energy);
else
kineticEnergyArgs.push_back(&energyFloat);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory());
keNeedsForce = usesVariable(keExpression, "f");
......@@ -6197,11 +6243,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
}
else {
recordChangedParameters(context);
context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
if (computeEnergy) {
void* args[] = {&cu.getEnergyBuffer().getDevicePointer(), &potentialEnergy->getDevicePointer()};
cu.executeKernel(sumPotentialEnergyKernel, &args[0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
energyFloat = (float) energy;
}
forcesAreValid = true;
}
......@@ -6272,11 +6315,8 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
bool willNeedEnergy = false;
for (int i = 0; i < integrator.getNumComputations(); i++)
willNeedEnergy |= needsEnergy[i];
context.calcForcesAndEnergy(true, willNeedEnergy, -1);
if (willNeedEnergy) {
void* args[] = {&cu.getEnergyBuffer().getDevicePointer(), &potentialEnergy->getDevicePointer()};
cu.executeKernel(sumPotentialEnergyKernel, &args[0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1);
energyFloat = (float) energy;
forcesAreValid = true;
}
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
......
......@@ -536,6 +536,11 @@ double CudaParallelCalcCMAPTorsionForceKernel::execute(ContextImpl& context, boo
return 0.0;
}
void CudaParallelCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCustomTorsionForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomTorsionForceKernel& kernel, bool includeForce,
......
extern "C" __global__ void computeGlobal(mixed2* __restrict__ dt, mixed* __restrict__ globals, mixed* __restrict__ params,
float uniform, float gaussian, const real* __restrict__ energy) {
float uniform, float gaussian, const real energy) {
COMPUTE_STEP
}
......@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
const mixed* __restrict__ params, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues,
unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy
unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real energy
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-2012 Stanford University and the Authors. *
* Portions copyright (c) 2010-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -105,11 +105,68 @@ void testCMAPTorsions() {
}
}
void testChangingParameters() {
// Create a system with two maps and one torsion.
const int mapSize = 8;
System system;
for (int i = 0; i < 5; i++)
system.addParticle(1.0);
CMAPTorsionForce* cmap = new CMAPTorsionForce();
vector<double> mapEnergy1(mapSize*mapSize);
vector<double> mapEnergy2(mapSize*mapSize);
for (int i = 0; i < mapSize; i++) {
double angle1 = i*2*M_PI/mapSize;
double energy1 = cos(angle1);
for (int j = 0; j < mapSize; j++) {
double angle2 = j*2*M_PI/mapSize;
double energy2 = 10*sin(angle2);
mapEnergy1[i+j*mapSize] = energy1+energy2;
mapEnergy2[i+j*mapSize] = energy1-energy2;
}
}
cmap->addMap(mapSize, mapEnergy1);
cmap->addMap(mapSize, mapEnergy2);
cmap->addTorsion(0, 0, 1, 2, 3, 1, 2, 3, 4);
system.addForce(cmap);
// Set particle positions so angle1=0 and angle2=PI/4.
vector<Vec3> positions(5);
positions[0] = Vec3(0, 0, 1);
positions[1] = Vec3(0, 0, 0);
positions[2] = Vec3(1, 0, 0);
positions[3] = Vec3(1, 0, 1);
positions[4] = Vec3(0.5, -0.5, 1);
VerletIntegrator integrator(0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
// Check that the energy is correct.
double energy = context.getState(State::Energy).getPotentialEnergy();
ASSERT_EQUAL_TOL(1+10*sin(M_PI/4), energy, 1e-5);
// Modify the parameters.
cmap->setTorsionParameters(0, 1, 0, 1, 2, 3, 1, 2, 3, 4);
for (int i = 0; i < mapSize*mapSize; i++)
mapEnergy2[i] *= 2.0;
cmap->setMapParameters(1, mapSize, mapEnergy2);
cmap->updateParametersInContext(context);
// See if the results are correct.
energy = context.getState(State::Energy).getPotentialEnergy();
ASSERT_EQUAL_TOL(2-20*sin(M_PI/4), energy, 1e-5);
}
int main(int argc, char* argv[]) {
try {
if (argc > 1)
platform.setPropertyDefaultValue("CudaPrecision", string(argv[1]));
testCMAPTorsions();
testChangingParameters();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -40,7 +40,7 @@ namespace OpenMM {
* 15, 207–228 (2000).
* <p>
* This class places certain restrictions on the allowed dimensions of the grid. First,
* the size of each dimension may have no prime factors other than 2, 3, and 5. You
* the size of each dimension may have no prime factors other than 2, 3, 5, and 7. You
* can call findLegalDimension() to determine the smallest size that satisfies this
* requirement and is greater than or equal to a specified minimum size. Second, the size
* of each dimension must be small enough to compute each 1D transform entirely in local
......@@ -61,12 +61,17 @@ public:
* @param xsize the first dimension of the data sets on which FFTs will be performed
* @param ysize the second dimension of the data sets on which FFTs will be performed
* @param zsize the third dimension of the data sets on which FFTs will be performed
* @param realToComplex if true, a real-to-complex transform will be done. Otherwise, it is complex-to-complex.
*/
OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize);
OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize, bool realToComplex=false);
/**
* Perform a Fourier transform. The transform cannot be done in-place: the input and output
* arrays must be different. Also, the input array is used as workspace, so its contents
* are destroyed.
* are destroyed. This also means that both arrays must be large enough to hold complex values,
* even when performing a real-to-complex transform.
* <p>
* When performing a real-to-complex transform, the output data is of size xsize*ysize*(zsize/2+1)
* and contains only the non-redundant elements.
*
* @param in the data to transform, ordered such that in[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
* @param out on exit, this contains the transformed data
......@@ -75,17 +80,20 @@ public:
void execFFT(OpenCLArray& in, OpenCLArray& out, bool forward = true);
/**
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* factors other than 2, 3, and 5).
* factors other than 2, 3, 5, and 7).
*
* @param minimum the minimum size the return value must be greater than or equal to
*/
static int findLegalDimension(int minimum);
private:
cl::Kernel createKernel(int xsize, int ysize, int zsize, int& threads);
cl::Kernel createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal);
int xsize, ysize, zsize;
int xthreads, ythreads, zthreads;
bool packRealAsComplex;
OpenCLContext& context;
cl::Kernel xkernel, ykernel, zkernel;
cl::Kernel invxkernel, invykernel, invzkernel;
cl::Kernel packForwardKernel, unpackForwardKernel, packBackwardKernel, unpackBackwardKernel;
};
} // namespace OpenMM
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -498,11 +498,19 @@ public:
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CMAPTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private:
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
const System& system;
std::vector<mm_int2> mapPositionsVec;
OpenCLArray* coefficients;
OpenCLArray* mapPositions;
OpenCLArray* torsionMaps;
......@@ -631,6 +639,7 @@ private:
cl::Kernel pmeSpreadChargeKernel;
cl::Kernel pmeFinishSpreadChargeKernel;
cl::Kernel pmeConvolutionKernel;
cl::Kernel pmeEvalEnergyKernel;
cl::Kernel pmeInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines;
std::vector<std::pair<int, int> > exceptionAtoms;
......@@ -1264,7 +1273,7 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordChangedParameters(ContextImpl& context);
OpenCLContext& cl;
double prevStepSize;
double prevStepSize, energy;
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent;
......@@ -1284,7 +1293,7 @@ private:
std::vector<double> contextValuesDouble;
std::vector<float> contextValues;
std::vector<std::vector<cl::Kernel> > kernels;
cl::Kernel sumPotentialEnergyKernel, randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
cl::Kernel randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<bool> needsForces;
std::vector<bool> needsEnergy;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment