Commit 74c3def4 authored by Peter Eastman's avatar Peter Eastman
Browse files

Implemented CCMA for OpenCL platform

parent cd8bf0f2
...@@ -27,6 +27,9 @@ ...@@ -27,6 +27,9 @@
#include "OpenCLIntegrationUtilities.h" #include "OpenCLIntegrationUtilities.h"
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include "openmm/HarmonicAngleForce.h"
#include "quern.h"
#include "OpenCLExpressionUtilities.h"
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <map> #include <map>
...@@ -67,9 +70,24 @@ struct OpenCLIntegrationUtilities::ShakeCluster { ...@@ -67,9 +70,24 @@ struct OpenCLIntegrationUtilities::ShakeCluster {
} }
}; };
struct OpenCLIntegrationUtilities::ConstraintOrderer : public binary_function<int, int, bool> {
const vector<int>& atom1;
const vector<int>& atom2;
ConstraintOrderer(const vector<int>& atom1, const vector<int>& atom2) : atom1(atom1), atom2(atom2) {
}
bool operator()(int x, int y) {
if (atom1[x] != atom1[y])
return atom1[x] < atom1[y];
return atom2[x] < atom2[y];
}
};
OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : context(context), OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : context(context),
posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL), posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL),
random(NULL), randomSeed(NULL), randomPos(NULL), stepSize(NULL) { random(NULL), randomSeed(NULL), randomPos(NULL), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL),
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL),
hasInitializedConstraintKernels(false) {
// Create workspace arrays. // Create workspace arrays.
posDelta = new OpenCLArray<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta"); posDelta = new OpenCLArray<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
...@@ -101,7 +119,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -101,7 +119,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// atom that might be part of such a cluster, make a list of the two other atoms it is // atom that might be part of such a cluster, make a list of the two other atoms it is
// connected to. // connected to.
vector<map<int, float> > settleConstraints(system.getNumParticles()); int numAtoms = system.getNumParticles();
vector<map<int, float> > settleConstraints(numAtoms);
for (int i = 0; i < (int)atom1.size(); i++) { for (int i = 0; i < (int)atom1.size(); i++) {
if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) { if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) {
settleConstraints[atom1[i]][atom2[i]] = (float) distance[i]; settleConstraints[atom1[i]][atom2[i]] = (float) distance[i];
...@@ -128,7 +147,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -128,7 +147,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Record the SETTLE clusters. // Record the SETTLE clusters.
vector<bool> isShakeAtom(system.getNumParticles(), false); vector<bool> isShakeAtom(numAtoms, false);
if (settleClusters.size() > 0) { if (settleClusters.size() > 0) {
vector<mm_int4> atoms; vector<mm_int4> atoms;
vector<mm_float2> params; vector<mm_float2> params;
...@@ -169,7 +188,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -169,7 +188,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Find clusters consisting of a central atom with up to three peripheral atoms. // Find clusters consisting of a central atom with up to three peripheral atoms.
map<int, ShakeCluster> clusters; map<int, ShakeCluster> clusters;
vector<bool> invalidForShake(system.getNumParticles(), false); vector<bool> invalidForShake(numAtoms, false);
for (int i = 0; i < (int) atom1.size(); i++) { for (int i = 0; i < (int) atom1.size(); i++) {
if (isShakeAtom[atom1[i]]) if (isShakeAtom[atom1[i]])
continue; // This is being taken care of with SETTLE. continue; // This is being taken care of with SETTLE.
...@@ -247,6 +266,239 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -247,6 +266,239 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
shakeAtoms->upload(atoms); shakeAtoms->upload(atoms);
shakeParams->upload(params); shakeParams->upload(params);
} }
// Find connected constraints for CCMA.
vector<int> ccmaConstraints;
for (unsigned i = 0; i < atom1.size(); i++)
if (!isShakeAtom[atom1[i]])
ccmaConstraints.push_back(i);
// Record the connections between constraints.
int numCCMA = (int) ccmaConstraints.size();
if (numCCMA > 0) {
vector<vector<int> > atomConstraints(context.getNumAtoms());
for (int i = 0; i < numCCMA; i++) {
atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
}
vector<vector<int> > linkedConstraints(numCCMA);
for (unsigned atom = 0; atom < atomConstraints.size(); atom++) {
for (unsigned i = 0; i < atomConstraints[atom].size(); i++)
for (unsigned j = 0; j < i; j++) {
int c1 = atomConstraints[atom][i];
int c2 = atomConstraints[atom][j];
linkedConstraints[c1].push_back(c2);
linkedConstraints[c2].push_back(c1);
}
}
int maxLinks = 0;
for (unsigned i = 0; i < linkedConstraints.size(); i++)
maxLinks = max(maxLinks, (int) linkedConstraints[i].size());
int maxAtomConstraints = 0;
for (unsigned i = 0; i < atomConstraints.size(); i++)
maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());
// Compute the constraint coupling matrix
vector<vector<int> > atomAngles(numAtoms);
HarmonicAngleForce const* angleForce = NULL;
for (int i = 0; i < system.getNumForces() && angleForce == NULL; i++)
angleForce = dynamic_cast<HarmonicAngleForce const*>(&system.getForce(i));
if (angleForce != NULL)
for (int i = 0; i < angleForce->getNumAngles(); i++) {
int particle1, particle2, particle3;
double angle, k;
angleForce->getAngleParameters(i, particle1, particle2, particle3, angle, k);
atomAngles[particle2].push_back(i);
}
vector<vector<pair<int, double> > > matrix(numCCMA);
for (int j = 0; j < numCCMA; j++) {
for (int k = 0; k < numCCMA; k++) {
if (j == k) {
matrix[j].push_back(pair<int, double>(j, 1.0));
continue;
}
double scale;
int cj = ccmaConstraints[j];
int ck = ccmaConstraints[k];
int atomj0 = atom1[cj];
int atomj1 = atom2[cj];
int atomk0 = atom1[ck];
int atomk1 = atom2[ck];
int atoma, atomb, atomc;
double imj0 = 1.0/system.getParticleMass(atomj0);
double imj1 = 1.0/system.getParticleMass(atomj1);
if (atomj0 == atomk0) {
atoma = atomj1;
atomb = atomj0;
atomc = atomk1;
scale = imj0/(imj0+imj1);
}
else if (atomj1 == atomk1) {
atoma = atomj0;
atomb = atomj1;
atomc = atomk0;
scale = imj1/(imj0+imj1);
}
else if (atomj0 == atomk1) {
atoma = atomj1;
atomb = atomj0;
atomc = atomk0;
scale = imj0/(imj0+imj1);
}
else if (atomj1 == atomk0) {
atoma = atomj0;
atomb = atomj1;
atomc = atomk1;
scale = imj1/(imj0+imj1);
}
else
continue; // These constraints are not connected.
// Look for a third constraint forming a triangle with these two.
bool foundConstraint = false;
for (int other = 0; other < numCCMA; other++) {
if ((atom1[other] == atoma && atom2[other] == atomc) || (atom1[other] == atomc && atom2[other] == atoma)) {
double d1 = distance[cj];
double d2 = distance[ck];
double d3 = distance[other];
matrix[j].push_back(pair<int, double>(k, scale*(d1*d1+d2*d2-d3*d3)/(2.0*d1*d2)));
foundConstraint = true;
break;
}
}
if (!foundConstraint && angleForce != NULL) {
// We didn't find one, so look for an angle force field term.
const vector<int>& angleCandidates = atomAngles[atomb];
for (vector<int>::const_iterator iter = angleCandidates.begin(); iter != angleCandidates.end(); iter++) {
int particle1, particle2, particle3;
double angle, ka;
angleForce->getAngleParameters(*iter, particle1, particle2, particle3, angle, ka);
if ((particle1 == atoma && particle3 == atomc) || (particle3 == atoma && particle1 == atomc)) {
matrix[j].push_back(pair<int, double>(k, scale*cos(angle)));
break;
}
}
}
}
}
// Invert it using QR.
vector<int> matrixRowStart;
vector<int> matrixColIndex;
vector<double> matrixValue;
for (int i = 0; i < numCCMA; i++) {
matrixRowStart.push_back(matrixValue.size());
for (int j = 0; j < (int) matrix[i].size(); j++) {
pair<int, double> element = matrix[i][j];
matrixColIndex.push_back(element.first);
matrixValue.push_back(element.second);
}
}
matrixRowStart.push_back(matrixValue.size());
int *qRowStart, *qColIndex, *rRowStart, *rColIndex;
double *qValue, *rValue;
int result = QUERN_compute_qr(numCCMA, numCCMA, &matrixRowStart[0], &matrixColIndex[0], &matrixValue[0], NULL,
&qRowStart, &qColIndex, &qValue, &rRowStart, &rColIndex, &rValue);
vector<double> rhs(numCCMA);
matrix.clear();
matrix.resize(numCCMA);
for (int i = 0; i < numCCMA; i++) {
// Extract column i of the inverse matrix.
for (int j = 0; j < numCCMA; j++)
rhs[j] = (i == j ? 1.0 : 0.0);
result = QUERN_multiply_with_q_transpose(numCCMA, qRowStart, qColIndex, qValue, &rhs[0]);
result = QUERN_solve_with_r(numCCMA, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]);
for (int j = 0; j < numCCMA; j++) {
double value = rhs[j]*distance[ccmaConstraints[i]]/distance[ccmaConstraints[j]];
if (abs(value) > 0.1)
matrix[j].push_back(pair<int, double>(i, value));
}
}
QUERN_free_result(qRowStart, qColIndex, qValue);
QUERN_free_result(rRowStart, rColIndex, rValue);
int maxRowElements = 0;
for (unsigned i = 0; i < matrix.size(); i++)
maxRowElements = max(maxRowElements, (int) matrix[i].size());
maxRowElements++;
// Sort the constraints.
vector<int> constraintOrder(numCCMA);
for (int i = 0; i < numCCMA; ++i)
constraintOrder[i] = i;
sort(constraintOrder.begin(), constraintOrder.end(), ConstraintOrderer(atom1, atom2));
vector<int> inverseOrder(numCCMA);
for (int i = 0; i < numCCMA; ++i)
inverseOrder[constraintOrder[i]] = i;
for (int i = 0; i < (int)matrix.size(); ++i)
for (int j = 0; j < (int)matrix[i].size(); ++j)
matrix[i][j].first = inverseOrder[matrix[i][j].first];
// Record the CCMA data structures.
ccmaAtoms = new OpenCLArray<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaDistance = new OpenCLArray<mm_float4>(context, numCCMA, "CcmaDistance");
ccmaAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaDelta1 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta2");
ccmaConverged = new OpenCLArray<cl_int>(context, context.getNumThreadBlocks(), "CcmaConverged");
ccmaReducedMass = new OpenCLArray<cl_float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixColumn = new OpenCLArray<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixValue = new OpenCLArray<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
vector<mm_int2> atomsVec(ccmaAtoms->getSize());
vector<mm_float4> distanceVec(ccmaDistance->getSize());
vector<cl_int> atomConstraintsVec(ccmaAtomConstraints->getSize());
vector<cl_int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize());
vector<cl_float> reducedMassVec(ccmaReducedMass->getSize());
vector<cl_int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn->getSize());
vector<cl_float> constraintMatrixValueVec(ccmaConstraintMatrixValue->getSize());
for (int i = 0; i < numCCMA; i++) {
int index = constraintOrder[i];
int c = ccmaConstraints[index];
atomsVec[i].x = atom1[c];
atomsVec[i].y = atom2[c];
distanceVec[i].w = distance[c];
reducedMassVec[i] = (float) (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
for (unsigned int j = 0; j < matrix[index].size(); j++) {
constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
constraintMatrixValueVec[i+j*numCCMA] = (float) matrix[index][j].second;
}
constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
}
for (unsigned int i = 0; i < atomConstraints.size(); i++) {
numAtomConstraintsVec[i] = atomConstraints[i].size();
for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i);
atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
}
}
ccmaAtoms->upload(atomsVec);
ccmaDistance->upload(distanceVec);
ccmaAtomConstraints->upload(atomConstraintsVec);
ccmaNumAtomConstraints->upload(numAtomConstraintsVec);
ccmaReducedMass->upload(reducedMassVec);
ccmaConstraintMatrixColumn->upload(constraintMatrixColumnVec);
ccmaConstraintMatrixValue->upload(constraintMatrixValueVec);
// Create the CCMA kernels.
map<string, string> defines;
defines["NUM_CONSTRAINTS"] = OpenCLExpressionUtilities::intToString(numCCMA);
defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(numAtoms);
cl::Program ccmaProgram = context.createProgram(OpenCLKernelSources::ccma, defines);
ccmaDirectionsKernel = cl::Kernel(ccmaProgram, "computeConstraintDirections");
ccmaForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce");
ccmaMultiplyKernel = cl::Kernel(ccmaProgram, "multiplyByConstraintMatrix");
ccmaUpdateKernel = cl::Kernel(ccmaProgram, "updateAtomPositions");
}
} }
OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() { OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
...@@ -266,30 +518,103 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() { ...@@ -266,30 +518,103 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete randomSeed; delete randomSeed;
if (stepSize != NULL) if (stepSize != NULL)
delete stepSize; delete stepSize;
if (ccmaAtoms != NULL)
delete ccmaAtoms;
if (ccmaDistance != NULL)
delete ccmaDistance;
if (ccmaReducedMass != NULL)
delete ccmaReducedMass;
if (ccmaAtomConstraints != NULL)
delete ccmaAtomConstraints;
if (ccmaNumAtomConstraints != NULL)
delete ccmaNumAtomConstraints;
if (ccmaConstraintMatrixColumn != NULL)
delete ccmaConstraintMatrixColumn;
if (ccmaConstraintMatrixValue != NULL)
delete ccmaConstraintMatrixValue;
if (ccmaDelta1 != NULL)
delete ccmaDelta1;
if (ccmaDelta2 != NULL)
delete ccmaDelta2;
if (ccmaConverged != NULL)
delete ccmaConverged;
} }
void OpenCLIntegrationUtilities::applyConstraints(double tol) { void OpenCLIntegrationUtilities::applyConstraints(double tol) {
if (settleAtoms != NULL) { if (settleAtoms != NULL) {
settleKernel.setArg<cl_int>(0, settleAtoms->getSize()); if (!hasInitializedConstraintKernels) {
settleKernel.setArg<cl_int>(0, settleAtoms->getSize());
settleKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(4, posDelta->getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(5, context.getVelm().getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(6, settleAtoms->getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(7, settleParams->getDeviceBuffer());
}
settleKernel.setArg<cl_float>(1, (cl_float) tol); settleKernel.setArg<cl_float>(1, (cl_float) tol);
settleKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(4, posDelta->getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(5, context.getVelm().getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(6, settleAtoms->getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(7, settleParams->getDeviceBuffer());
context.executeKernel(settleKernel, settleAtoms->getSize()); context.executeKernel(settleKernel, settleAtoms->getSize());
} }
if (shakeAtoms != NULL) { if (shakeAtoms != NULL) {
shakeKernel.setArg<cl_int>(0, shakeAtoms->getSize()); if (!hasInitializedConstraintKernels) {
shakeKernel.setArg<cl_int>(0, shakeAtoms->getSize());
shakeKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(4, posDelta->getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(5, shakeAtoms->getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(6, shakeParams->getDeviceBuffer());
}
shakeKernel.setArg<cl_float>(1, (cl_float) tol); shakeKernel.setArg<cl_float>(1, (cl_float) tol);
shakeKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(4, posDelta->getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(5, shakeAtoms->getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(6, shakeParams->getDeviceBuffer());
context.executeKernel(shakeKernel, shakeAtoms->getSize()); context.executeKernel(shakeKernel, shakeAtoms->getSize());
} }
if (ccmaAtoms != NULL) {
if (!hasInitializedConstraintKernels) {
ccmaDirectionsKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer());
ccmaDirectionsKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer());
ccmaDirectionsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
ccmaDirectionsKernel.setArg<cl::Buffer>(3, ccmaConverged->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(2, posDelta->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1->getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged->getDeviceBuffer());
ccmaForceKernel.setArg(6, sizeof(cl_int)*OpenCLContext::ThreadBlockSize, NULL);
ccmaMultiplyKernel.setArg<cl::Buffer>(0, ccmaDelta1->getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(1, ccmaDelta2->getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(2, ccmaConstraintMatrixColumn->getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(3, ccmaConstraintMatrixValue->getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(4, ccmaConverged->getDeviceBuffer());
ccmaMultiplyKernel.setArg(5, sizeof(cl_int)*OpenCLContext::ThreadBlockSize, NULL);
ccmaUpdateKernel.setArg<cl::Buffer>(0, ccmaNumAtomConstraints->getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(1, ccmaAtomConstraints->getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(2, ccmaDistance->getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(5, ccmaDelta1->getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(6, ccmaDelta2->getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(7, ccmaConverged->getDeviceBuffer());
}
ccmaForceKernel.setArg<cl_float>(7, (cl_float) tol);
context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize());
vector<cl_int> converged;
for (int i = 0; i < 75; i++) {
// To reduce the overhead of checking for convergence, perform two iterations
// each time through the loop.
context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize());
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize());
ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize());
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize());
ccmaConverged->download(converged);
if (converged[0])
break;
ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
}
}
hasInitializedConstraintKernels = true;
} }
void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) { void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {
......
...@@ -80,6 +80,10 @@ private: ...@@ -80,6 +80,10 @@ private:
OpenCLContext& context; OpenCLContext& context;
cl::Kernel settleKernel; cl::Kernel settleKernel;
cl::Kernel shakeKernel; cl::Kernel shakeKernel;
cl::Kernel ccmaDirectionsKernel;
cl::Kernel ccmaForceKernel;
cl::Kernel ccmaMultiplyKernel;
cl::Kernel ccmaUpdateKernel;
cl::Kernel randomKernel; cl::Kernel randomKernel;
OpenCLArray<mm_float4>* posDelta; OpenCLArray<mm_float4>* posDelta;
OpenCLArray<mm_int4>* settleAtoms; OpenCLArray<mm_int4>* settleAtoms;
...@@ -89,9 +93,21 @@ private: ...@@ -89,9 +93,21 @@ private:
OpenCLArray<mm_float4>* random; OpenCLArray<mm_float4>* random;
OpenCLArray<mm_int4>* randomSeed; OpenCLArray<mm_int4>* randomSeed;
OpenCLArray<mm_float2>* stepSize; OpenCLArray<mm_float2>* stepSize;
OpenCLArray<mm_int2>* ccmaAtoms;
OpenCLArray<mm_float4>* ccmaDistance;
OpenCLArray<cl_float>* ccmaReducedMass;
OpenCLArray<cl_int>* ccmaAtomConstraints;
OpenCLArray<cl_int>* ccmaNumAtomConstraints;
OpenCLArray<cl_int>* ccmaConstraintMatrixColumn;
OpenCLArray<cl_float>* ccmaConstraintMatrixValue;
OpenCLArray<cl_float>* ccmaDelta1;
OpenCLArray<cl_float>* ccmaDelta2;
OpenCLArray<cl_int>* ccmaConverged;
int randomPos; int randomPos;
int lastSeed; int lastSeed;
bool hasInitializedConstraintKernels;
struct ShakeCluster; struct ShakeCluster;
struct ConstraintOrderer;
}; };
} // namespace OpenMM } // namespace OpenMM
......
/**
* Compute the direction each constraint is pointing in. This is called once at the beginning of constraint evaluation.
*/
__kernel void computeConstraintDirections(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions,
__global int* converged) {
for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
// Compute the direction for this constraint.
int2 atoms = constraintAtoms[index];
float4 dir = constraintDistance[index];
float4 oldPos1 = atomPositions[atoms.x];
float4 oldPos2 = atomPositions[atoms.y];
dir.x = oldPos1.x-oldPos2.x;
dir.y = oldPos1.y-oldPos2.y;
dir.z = oldPos1.z-oldPos2.z;
constraintDistance[index] = dir;
}
// Mark that no work groups have converged yet.
for (int index = get_global_id(0); index < get_num_groups(0); index += get_global_size(0))
converged[index] = false;
}
/**
* Compute the force applied by each constraint.
*/
__kernel void computeConstraintForce(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions,
__global float* reducedMass, __global float* delta1, __global int* converged, __local int* convergedBuffer, float tol) {
if (converged[get_group_id(0)])
return; // The constraint iteration has already converged.
float lowerTol = 1.0f-2.0f*tol+tol*tol;
float upperTol = 1.0f+2.0f*tol+tol*tol;
int threadConverged = 1;
for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
// Compute the force due to this constraint.
int2 atoms = constraintAtoms[index];
float4 dir = constraintDistance[index];
float4 rp_ij = atomPositions[atoms.x]-atomPositions[atoms.y];
rp_ij.xyz += dir.xyz;
float rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
float dist2 = dir.w*dir.w;
float diff = dist2 - rp2;
float rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
float d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f);
// See whether it has converged.
threadConverged &= (rp2 >= lowerTol*dist2 && rp2 <= upperTol*dist2);
}
// Perform a parallel reduction to see if all constraints handled by this work group have converged.
convergedBuffer[get_local_id(0)] = threadConverged;
barrier(CLK_LOCAL_MEM_FENCE);
for (int step = 1; step < get_local_size(0); step *= 2) {
if (get_local_id(0)%(2*step) == 0)
convergedBuffer[get_local_id(0)] &= convergedBuffer[get_local_id(0)+step];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(0) == 0)
converged[get_group_id(0)] = convergedBuffer[0];
}
/**
* Multiply the vector of constraint forces by the constraint matrix.
*/
__kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* delta2, __global int* constraintMatrixColumn,
__global float* constraintMatrixValue, __global int* converged, __local int* convergedBuffer) {
// First see whether all work groups have converged.
convergedBuffer[get_local_id(0)] = true;
for (int index = get_local_id(0); index < get_num_groups(0); index += get_local_size(0))
convergedBuffer[get_local_id(0)] &= converged[index];
barrier(CLK_LOCAL_MEM_FENCE);
for (int step = 1; step < get_local_size(0); step *= 2) {
if (get_local_id(0)%(2*step) == 0)
convergedBuffer[get_local_id(0)] &= convergedBuffer[get_local_id(0)+step];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(0) == 0)
converged[get_group_id(0)] = convergedBuffer[0];
if (converged[0])
return; // The constraint iteration has already converged.
// Multiply by the inverse constraint matrix.
for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
float sum = 0.0f;
for (int i = 0; ; i++) {
int element = index+i*NUM_CONSTRAINTS;
int column = constraintMatrixColumn[element];
if (column >= NUM_CONSTRAINTS)
break;
sum += delta1[column]*constraintMatrixValue[element];
}
delta2[index] = sum;
}
}
/**
* Update the atom positions based on constraint forces.
*/
__kernel void updateAtomPositions(__global int* numAtomConstraints, __global int* atomConstraints, __global float4* constraintDistance,
__global float4* atomPositions, __global float4* velm, __global float* delta1, __global float* delta2, __global int* converged, int iteration) {
if (converged[get_group_id(0)])
return; // The constraint iteration has already converged.
float damping = (iteration < 2 ? 0.5f : 1.0f);
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
// Compute the new position of this atom.
float4 atomPos = atomPositions[index];
float invMass = velm[index].w;
int num = numAtomConstraints[index];
for (int i = 0; i < num; i++) {
int constraint = atomConstraints[index+i*NUM_ATOMS];
bool forward = (constraint > 0);
constraint = (forward ? constraint-1 : -constraint-1);
float constraintForce = damping*invMass*delta2[constraint];
constraintForce = (forward ? constraintForce : -constraintForce);
float4 dir = constraintDistance[constraint];
atomPos.x += constraintForce*dir.x;
atomPos.y += constraintForce*dir.y;
atomPos.z += constraintForce*dir.z;
}
atomPositions[index] = atomPos;
}
}
...@@ -234,7 +234,7 @@ int main() { ...@@ -234,7 +234,7 @@ int main() {
try { try {
testSingleBond(); testSingleBond();
testTemperature(); testTemperature();
// testConstraints(); testConstraints();
testRandomSeed(); testRandomSeed();
} }
catch(const exception& e) { catch(const exception& e) {
......
...@@ -239,7 +239,7 @@ int main() { ...@@ -239,7 +239,7 @@ int main() {
try { try {
testSingleBond(); testSingleBond();
testTemperature(); testTemperature();
// testConstraints(); testConstraints();
testRandomSeed(); testRandomSeed();
} }
catch(const exception& e) { catch(const exception& e) {
......
...@@ -232,7 +232,7 @@ int main() { ...@@ -232,7 +232,7 @@ int main() {
try { try {
testSingleBond(); testSingleBond();
testTemperature(); testTemperature();
// testConstraints(); testConstraints();
testRandomSeed(); testRandomSeed();
} }
catch(const exception& e) { catch(const exception& e) {
......
...@@ -212,8 +212,8 @@ void testConstrainedClusters() { ...@@ -212,8 +212,8 @@ void testConstrainedClusters() {
int main() { int main() {
try { try {
testSingleBond(); testSingleBond();
// testConstraints(); testConstraints();
// testConstrainedClusters(); testConstrainedClusters();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -203,8 +203,8 @@ void testConstrainedClusters() { ...@@ -203,8 +203,8 @@ void testConstrainedClusters() {
int main() { int main() {
try { try {
testSingleBond(); testSingleBond();
// testConstraints(); testConstraints();
// testConstrainedClusters(); testConstrainedClusters();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment