Unverified Commit b3d98469 authored by peastman's avatar peastman Committed by GitHub
Browse files

CCMA with a small number of constraints uses a single kernel (#2818)

* CCMA with a small number of constraints uses a single kernel

* Fixed compilation errors in kernel

* Fixed compilation errors in kernel

* Further optimizations to CCMA with few constraints
parent 6d20ff07
...@@ -136,7 +136,7 @@ protected: ...@@ -136,7 +136,7 @@ protected:
ComputeKernel settlePosKernel, settleVelKernel; ComputeKernel settlePosKernel, settleVelKernel;
ComputeKernel shakePosKernel, shakeVelKernel; ComputeKernel shakePosKernel, shakeVelKernel;
ComputeKernel ccmaDirectionsKernel, ccmaPosForceKernel, ccmaVelForceKernel; ComputeKernel ccmaDirectionsKernel, ccmaPosForceKernel, ccmaVelForceKernel;
ComputeKernel ccmaMultiplyKernel, ccmaUpdateKernel; ComputeKernel ccmaMultiplyKernel, ccmaUpdateKernel, ccmaFullKernel;
ComputeKernel vsitePositionKernel, vsiteForceKernel, vsiteSaveForcesKernel; ComputeKernel vsitePositionKernel, vsiteForceKernel, vsiteSaveForcesKernel;
ComputeKernel randomKernel, timeShiftKernel; ComputeKernel randomKernel, timeShiftKernel;
ComputeArray posDelta; ComputeArray posDelta;
...@@ -148,6 +148,7 @@ protected: ...@@ -148,6 +148,7 @@ protected:
ComputeArray randomSeed; ComputeArray randomSeed;
ComputeArray stepSize; ComputeArray stepSize;
ComputeArray ccmaAtoms; ComputeArray ccmaAtoms;
ComputeArray ccmaConstraintAtoms;
ComputeArray ccmaDistance; ComputeArray ccmaDistance;
ComputeArray ccmaReducedMass; ComputeArray ccmaReducedMass;
ComputeArray ccmaAtomConstraints; ComputeArray ccmaAtomConstraints;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2020 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <map> #include <map>
#include <set>
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -292,6 +293,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -292,6 +293,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
// Record the connections between constraints. // Record the connections between constraints.
int numCCMA = (int) ccmaConstraints.size(); int numCCMA = (int) ccmaConstraints.size();
int numCCMAAtoms = 0;
if (numCCMA > 0) { if (numCCMA > 0) {
// Record information needed by ReferenceCCMAAlgorithm. // Record information needed by ReferenceCCMAAlgorithm.
...@@ -354,14 +356,26 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -354,14 +356,26 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
for (int j = 0; j < (int)matrix[i].size(); ++j) for (int j = 0; j < (int)matrix[i].size(); ++j)
matrix[i][j].first = inverseOrder[matrix[i][j].first]; matrix[i][j].first = inverseOrder[matrix[i][j].first];
// Make a list of all atoms that involve a CCMA constraint.
set<int> ccmaAtomsSet;
for (int i = 0; i < numCCMA; i++) {
ccmaAtomsSet.insert(atom1[ccmaConstraints[i]]);
ccmaAtomsSet.insert(atom2[ccmaConstraints[i]]);
}
vector<int> ccmaAtomsVec(ccmaAtomsSet.begin(), ccmaAtomsSet.end());
sort(ccmaAtomsVec.begin(), ccmaAtomsVec.end());
numCCMAAtoms = ccmaAtomsVec.size();
// Record the CCMA data structures. // Record the CCMA data structures.
ccmaAtoms.initialize<mm_int2>(context, numCCMA, "CcmaAtoms"); ccmaAtoms.initialize<int>(context, numCCMAAtoms, "ccmaAtoms");
ccmaConstraintAtoms.initialize<mm_int2>(context, numCCMA, "ccmaConstraintAtoms");
ccmaAtomConstraints.initialize<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints"); ccmaAtomConstraints.initialize<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints.initialize<int>(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaNumAtomConstraints.initialize<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConstraintMatrixColumn.initialize<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn.initialize<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged.initialize<int>(context, 2, "ccmaConverged"); ccmaConverged.initialize<int>(context, 2, "ccmaConverged");
vector<mm_int2> atomsVec(ccmaAtoms.getSize()); vector<mm_int2> atomsVec(ccmaConstraintAtoms.getSize());
vector<int> atomConstraintsVec(ccmaAtomConstraints.getSize()); vector<int> atomConstraintsVec(ccmaAtomConstraints.getSize());
vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize()); vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize()); vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
...@@ -397,7 +411,8 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -397,7 +411,8 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1); atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
} }
} }
ccmaAtoms.upload(atomsVec); ccmaAtoms.upload(ccmaAtomsVec);
ccmaConstraintAtoms.upload(atomsVec);
ccmaAtomConstraints.upload(atomConstraintsVec); ccmaAtomConstraints.upload(atomConstraintsVec);
ccmaNumAtomConstraints.upload(numAtomConstraintsVec); ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec); ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec);
...@@ -518,6 +533,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -518,6 +533,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
// Create the kernels used by this class. // Create the kernels used by this class.
map<string, string> defines; map<string, string> defines;
defines["NUM_CCMA_ATOMS"] = context.intToString(numCCMAAtoms);
defines["NUM_CCMA_CONSTRAINTS"] = context.intToString(numCCMA); defines["NUM_CCMA_CONSTRAINTS"] = context.intToString(numCCMA);
defines["NUM_ATOMS"] = context.intToString(numAtoms); defines["NUM_ATOMS"] = context.intToString(numAtoms);
defines["NUM_2_AVERAGE"] = context.intToString(num2Avg); defines["NUM_2_AVERAGE"] = context.intToString(num2Avg);
...@@ -532,11 +548,12 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -532,11 +548,12 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
settleVelKernel = program->createKernel("applySettleToVelocities"); settleVelKernel = program->createKernel("applySettleToVelocities");
shakePosKernel = program->createKernel("applyShakeToPositions"); shakePosKernel = program->createKernel("applyShakeToPositions");
shakeVelKernel = program->createKernel("applyShakeToVelocities"); shakeVelKernel = program->createKernel("applyShakeToVelocities");
ccmaDirectionsKernel = program->createKernel("computeCCMAConstraintDirections"); ccmaDirectionsKernel = program->createKernel("computeCCMAConstraintDirectionsKernel");
ccmaPosForceKernel = program->createKernel("computeCCMAPositionConstraintForce"); ccmaPosForceKernel = program->createKernel("computeCCMAPositionConstraintForceKernel");
ccmaVelForceKernel = program->createKernel("computeCCMAVelocityConstraintForce"); ccmaVelForceKernel = program->createKernel("computeCCMAVelocityConstraintForceKernel");
ccmaMultiplyKernel = program->createKernel("multiplyByCCMAConstraintMatrix"); ccmaMultiplyKernel = program->createKernel("multiplyByCCMAConstraintMatrixKernel");
ccmaUpdateKernel = program->createKernel("updateCCMAAtomPositions"); ccmaUpdateKernel = program->createKernel("updateCCMAAtomPositionsKernel");
ccmaFullKernel = program->createKernel("runCCMA");
vsitePositionKernel = program->createKernel("computeVirtualSites"); vsitePositionKernel = program->createKernel("computeVirtualSites");
vsiteForceKernel = program->createKernel("distributeVirtualSiteForces"); vsiteForceKernel = program->createKernel("distributeVirtualSiteForces");
vsiteSaveForcesKernel = program->createKernel("saveDistributedForces"); vsiteSaveForcesKernel = program->createKernel("saveDistributedForces");
...@@ -621,14 +638,14 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -621,14 +638,14 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
shakeVelKernel->addArg(context.getPosqCorrection()); shakeVelKernel->addArg(context.getPosqCorrection());
} }
if (ccmaAtoms.isInitialized()) { if (ccmaConstraintAtoms.isInitialized()) {
ccmaDirectionsKernel->addArg(ccmaAtoms); ccmaDirectionsKernel->addArg(ccmaConstraintAtoms);
ccmaDirectionsKernel->addArg(ccmaDistance); ccmaDirectionsKernel->addArg(ccmaDistance);
ccmaDirectionsKernel->addArg(context.getPosq()); ccmaDirectionsKernel->addArg(context.getPosq());
ccmaDirectionsKernel->addArg(ccmaConverged); ccmaDirectionsKernel->addArg(ccmaConverged);
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
ccmaDirectionsKernel->addArg(context.getPosqCorrection()); ccmaDirectionsKernel->addArg(context.getPosqCorrection());
ccmaPosForceKernel->addArg(ccmaAtoms); ccmaPosForceKernel->addArg(ccmaConstraintAtoms);
ccmaPosForceKernel->addArg(ccmaDistance); ccmaPosForceKernel->addArg(ccmaDistance);
ccmaPosForceKernel->addArg(posDelta); ccmaPosForceKernel->addArg(posDelta);
ccmaPosForceKernel->addArg(ccmaReducedMass); ccmaPosForceKernel->addArg(ccmaReducedMass);
...@@ -637,7 +654,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -637,7 +654,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
ccmaPosForceKernel->addArg(); ccmaPosForceKernel->addArg();
ccmaPosForceKernel->addArg(); ccmaPosForceKernel->addArg();
ccmaPosForceKernel->addArg(); ccmaPosForceKernel->addArg();
ccmaVelForceKernel->addArg(ccmaAtoms); ccmaVelForceKernel->addArg(ccmaConstraintAtoms);
ccmaVelForceKernel->addArg(ccmaDistance); ccmaVelForceKernel->addArg(ccmaDistance);
ccmaVelForceKernel->addArg(context.getVelm()); ccmaVelForceKernel->addArg(context.getVelm());
ccmaVelForceKernel->addArg(ccmaReducedMass); ccmaVelForceKernel->addArg(ccmaReducedMass);
...@@ -652,6 +669,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -652,6 +669,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
ccmaMultiplyKernel->addArg(ccmaConstraintMatrixValue); ccmaMultiplyKernel->addArg(ccmaConstraintMatrixValue);
ccmaMultiplyKernel->addArg(ccmaConverged); ccmaMultiplyKernel->addArg(ccmaConverged);
ccmaMultiplyKernel->addArg(); ccmaMultiplyKernel->addArg();
ccmaUpdateKernel->addArg(ccmaAtoms);
ccmaUpdateKernel->addArg(ccmaNumAtomConstraints); ccmaUpdateKernel->addArg(ccmaNumAtomConstraints);
ccmaUpdateKernel->addArg(ccmaAtomConstraints); ccmaUpdateKernel->addArg(ccmaAtomConstraints);
ccmaUpdateKernel->addArg(ccmaDistance); ccmaUpdateKernel->addArg(ccmaDistance);
...@@ -661,6 +679,23 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -661,6 +679,23 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
ccmaUpdateKernel->addArg(ccmaDelta2); ccmaUpdateKernel->addArg(ccmaDelta2);
ccmaUpdateKernel->addArg(ccmaConverged); ccmaUpdateKernel->addArg(ccmaConverged);
ccmaUpdateKernel->addArg(); ccmaUpdateKernel->addArg();
ccmaFullKernel->addArg();
ccmaFullKernel->addArg(ccmaAtoms);
ccmaFullKernel->addArg(ccmaNumAtomConstraints);
ccmaFullKernel->addArg(ccmaAtomConstraints);
ccmaFullKernel->addArg(ccmaConstraintAtoms);
ccmaFullKernel->addArg(ccmaDistance);
ccmaFullKernel->addArg(context.getPosq());
ccmaFullKernel->addArg(context.getVelm());
ccmaFullKernel->addArg(posDelta);
ccmaFullKernel->addArg(ccmaReducedMass);
ccmaFullKernel->addArg(ccmaDelta1);
ccmaFullKernel->addArg(ccmaDelta2);
ccmaFullKernel->addArg(ccmaConstraintMatrixColumn);
ccmaFullKernel->addArg(ccmaConstraintMatrixValue);
ccmaFullKernel->addArg();
if (context.getUseMixedPrecision())
ccmaFullKernel->addArg(context.getPosqCorrection());
} }
// Arguments for time shift kernel will be set later. // Arguments for time shift kernel will be set later.
......
...@@ -556,8 +556,8 @@ KERNEL void applySettleToVelocities(int numClusters, mixed tol, GLOBAL const rea ...@@ -556,8 +556,8 @@ KERNEL void applySettleToVelocities(int numClusters, mixed tol, GLOBAL const rea
/** /**
* Compute the direction each CCMA constraint is pointing in. This is called once at the beginning of constraint evaluation. * Compute the direction each CCMA constraint is pointing in. This is called once at the beginning of constraint evaluation.
*/ */
KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance, DEVICE void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance,
GLOBAL const real4* RESTRICT atomPositions, GLOBAL int* RESTRICT converged GLOBAL const real4* RESTRICT atomPositions
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
, GLOBAL const real4* RESTRICT posqCorrection , GLOBAL const real4* RESTRICT posqCorrection
#endif #endif
...@@ -577,6 +577,19 @@ KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constrai ...@@ -577,6 +577,19 @@ KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constrai
dir.z = oldPos1.z-oldPos2.z; dir.z = oldPos1.z-oldPos2.z;
constraintDistance[index] = dir; constraintDistance[index] = dir;
} }
}
KERNEL void computeCCMAConstraintDirectionsKernel(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance,
GLOBAL const real4* RESTRICT atomPositions, GLOBAL int* RESTRICT converged
#ifdef USE_MIXED_PRECISION
, GLOBAL const real4* RESTRICT posqCorrection
#endif
) {
#ifdef USE_MIXED_PRECISION
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions, posqCorrection);
#else
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions);
#endif
if (GLOBAL_ID == 0) { if (GLOBAL_ID == 0) {
converged[0] = 1; converged[0] = 1;
converged[1] = 0; converged[1] = 0;
...@@ -586,19 +599,11 @@ KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constrai ...@@ -586,19 +599,11 @@ KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constrai
/** /**
* Compute the force applied by each CCMA position constraint. * Compute the force applied by each CCMA position constraint.
*/ */
KERNEL void computeCCMAPositionConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance, DEVICE void computeCCMAPositionConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1, GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
GLOBAL int* RESTRICT converged, GLOBAL int* RESTRICT hostConvergedFlag, mixed tol, int iteration) { mixed tol, int iteration, LOCAL_ARG int* groupConverged) {
LOCAL int groupConverged;
if (converged[1-iteration%2]) {
if (GLOBAL_ID == 0) {
converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged.
}
if (LOCAL_ID == 0) if (LOCAL_ID == 0)
groupConverged = 1; *groupConverged = 1;
SYNC_THREADS; SYNC_THREADS;
mixed lowerTol = 1-2*tol+tol*tol; mixed lowerTol = 1-2*tol+tol*tol;
mixed upperTol = 1+2*tol+tol*tol; mixed upperTol = 1+2*tol+tol*tol;
...@@ -620,30 +625,38 @@ KERNEL void computeCCMAPositionConstraintForce(GLOBAL const int2* RESTRICT const ...@@ -620,30 +625,38 @@ KERNEL void computeCCMAPositionConstraintForce(GLOBAL const int2* RESTRICT const
delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f); delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f);
threadConverged &= (rp2 > lowerTol*dist2 && rp2 < upperTol*dist2); threadConverged &= (rp2 > lowerTol*dist2 && rp2 < upperTol*dist2);
} }
if (groupConverged && !threadConverged) if (*groupConverged && !threadConverged)
groupConverged = 0; *groupConverged = 0;
SYNC_THREADS;
if (LOCAL_ID == 0 && !groupConverged)
converged[iteration%2] = 0;
} }
/** KERNEL void computeCCMAPositionConstraintForceKernel(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
* Compute the force applied by each CCMA velocity constraint.
*/
KERNEL void computeCCMAVelocityConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1, GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
GLOBAL int* RESTRICT converged, GLOBAL int* RESTRICT hostConvergedFlag, mixed tol, int iteration) { GLOBAL int* RESTRICT converged, GLOBAL int* RESTRICT hostConvergedFlag, mixed tol, int iteration) {
LOCAL int groupConverged; LOCAL int groupConverged;
if (converged[1-iteration%2]) { if (converged[1-iteration%2]) {
if (GROUP_ID == 0 && LOCAL_ID == 0) { if (GLOBAL_ID == 0) {
converged[iteration%2] = 1; converged[iteration%2] = 1;
hostConvergedFlag[0] = 1; hostConvergedFlag[0] = 1;
} }
return; // The constraint iteration has already converged. return; // The constraint iteration has already converged.
} }
computeCCMAPositionConstraintForce(constraintAtoms, constraintDistance, atomPositions, reducedMass,
delta1, tol, iteration, &groupConverged);
SYNC_THREADS;
if (LOCAL_ID == 0 && !groupConverged)
converged[iteration%2] = 0;
}
/**
* Compute the force applied by each CCMA velocity constraint.
*/
DEVICE void computeCCMAVelocityConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
mixed tol, int iteration, LOCAL_ARG int* groupConverged) {
if (LOCAL_ID == 0) if (LOCAL_ID == 0)
groupConverged = 1; *groupConverged = 1;
SYNC_THREADS; SYNC_THREADS;
bool threadConverged = true;
for (int index = GLOBAL_ID; index < NUM_CCMA_CONSTRAINTS; index += GLOBAL_SIZE) { for (int index = GLOBAL_ID; index < NUM_CCMA_CONSTRAINTS; index += GLOBAL_SIZE) {
// Compute the force due to this constraint. // Compute the force due to this constraint.
...@@ -653,24 +666,34 @@ KERNEL void computeCCMAVelocityConstraintForce(GLOBAL const int2* RESTRICT const ...@@ -653,24 +666,34 @@ KERNEL void computeCCMAVelocityConstraintForce(GLOBAL const int2* RESTRICT const
mixed rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z; mixed rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
mixed d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z; mixed d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
delta1[index] = -2*reducedMass[index]*rrpr/d_ij2; delta1[index] = -2*reducedMass[index]*rrpr/d_ij2;
threadConverged &= (fabs(delta1[index]) <= tol);
}
if (*groupConverged && !threadConverged)
*groupConverged = 0;
}
// See whether it has converged. KERNEL void computeCCMAVelocityConstraintForceKernel(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
if (groupConverged && fabs(delta1[index]) > tol) { GLOBAL int* RESTRICT converged, GLOBAL int* RESTRICT hostConvergedFlag, mixed tol, int iteration) {
groupConverged = 0; LOCAL int groupConverged;
converged[iteration%2] = 0; if (converged[1-iteration%2]) {
if (GROUP_ID == 0 && LOCAL_ID == 0) {
converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
} }
return; // The constraint iteration has already converged.
} }
computeCCMAVelocityConstraintForce(constraintAtoms, constraintDistance, atomPositions, reducedMass,
delta1, tol, iteration, &groupConverged);
if (LOCAL_ID == 0 && !groupConverged)
converged[iteration%2] = 0;
} }
/** /**
* Multiply the vector of CCMA constraint forces by the constraint matrix. * Multiply the vector of CCMA constraint forces by the constraint matrix.
*/ */
KERNEL void multiplyByCCMAConstraintMatrix(GLOBAL const mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn, DEVICE void multiplyByCCMAConstraintMatrix(GLOBAL const mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn,
GLOBAL const mixed* RESTRICT constraintMatrixValue, GLOBAL const int* RESTRICT converged, int iteration) { GLOBAL const mixed* RESTRICT constraintMatrixValue, int iteration) {
if (converged[iteration%2])
return; // The constraint iteration has already converged.
// Multiply by the inverse constraint matrix. // Multiply by the inverse constraint matrix.
for (int index = GLOBAL_ID; index < NUM_CCMA_CONSTRAINTS; index += GLOBAL_SIZE) { for (int index = GLOBAL_ID; index < NUM_CCMA_CONSTRAINTS; index += GLOBAL_SIZE) {
...@@ -686,20 +709,24 @@ KERNEL void multiplyByCCMAConstraintMatrix(GLOBAL const mixed* RESTRICT delta1, ...@@ -686,20 +709,24 @@ KERNEL void multiplyByCCMAConstraintMatrix(GLOBAL const mixed* RESTRICT delta1,
} }
} }
KERNEL void multiplyByCCMAConstraintMatrixKernel(GLOBAL const mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn,
GLOBAL const mixed* RESTRICT constraintMatrixValue, GLOBAL const int* RESTRICT converged, int iteration) {
if (converged[iteration%2])
return; // The constraint iteration has already converged.
multiplyByCCMAConstraintMatrix(delta1, delta2, constraintMatrixColumn, constraintMatrixValue, iteration);
}
/** /**
* Update the atom positions based on CCMA constraint forces. * Update the atom positions based on CCMA constraint forces.
*/ */
KERNEL void updateCCMAAtomPositions(GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints, DEVICE void updateCCMAAtomPositions(GLOBAL const int* RESTRICT atoms, GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints,
GLOBAL const mixed4* RESTRICT constraintDistance, GLOBAL mixed4* RESTRICT atomPositions, GLOBAL const mixed4* RESTRICT velm, GLOBAL const mixed4* RESTRICT constraintDistance, GLOBAL mixed4* RESTRICT atomPositions, GLOBAL const mixed4* RESTRICT velm,
GLOBAL const mixed* RESTRICT delta1, GLOBAL const mixed* RESTRICT delta2, GLOBAL int* RESTRICT converged, int iteration) { GLOBAL const mixed* RESTRICT delta1, GLOBAL const mixed* RESTRICT delta2, int iteration) {
if (GROUP_ID == 0 && LOCAL_ID == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged.
mixed damping = (iteration < 2 ? 0.5f : 1.0f); mixed damping = (iteration < 2 ? 0.5f : 1.0f);
for (int index = GLOBAL_ID; index < NUM_ATOMS; index += GLOBAL_SIZE) { for (int i = GLOBAL_ID; i < NUM_CCMA_ATOMS; i += GLOBAL_SIZE) {
// Compute the new position of this atom. // Compute the new position of this atom.
int index = atoms[i];
mixed4 atomPos = atomPositions[index]; mixed4 atomPos = atomPositions[index];
mixed invMass = velm[index].w; mixed invMass = velm[index].w;
int num = numAtomConstraints[index]; int num = numAtomConstraints[index];
...@@ -718,6 +745,60 @@ KERNEL void updateCCMAAtomPositions(GLOBAL const int* RESTRICT numAtomConstraint ...@@ -718,6 +745,60 @@ KERNEL void updateCCMAAtomPositions(GLOBAL const int* RESTRICT numAtomConstraint
} }
} }
KERNEL void updateCCMAAtomPositionsKernel(GLOBAL const int* RESTRICT atoms, GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints,
GLOBAL const mixed4* RESTRICT constraintDistance, GLOBAL mixed4* RESTRICT atomPositions, GLOBAL const mixed4* RESTRICT velm,
GLOBAL const mixed* RESTRICT delta1, GLOBAL const mixed* RESTRICT delta2, GLOBAL int* RESTRICT converged, int iteration) {
if (GROUP_ID == 0 && LOCAL_ID == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged.
updateCCMAAtomPositions(atoms, numAtomConstraints, atomConstraints, constraintDistance, atomPositions, velm,
delta1, delta2, iteration);
}
/**
* Run the entire CCMA iteration within a single kernel. This has far less overhead than
* using multiple kernels, but requires the calculation to use only a single workgroup.
* That makes it faster for small numbers of constraints, but slower for large numbers.
*/
KERNEL void runCCMA(int constrainVelocities, GLOBAL const int* RESTRICT atoms, GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints,
GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance, GLOBAL const real4* RESTRICT atomPositions,
GLOBAL mixed4* RESTRICT velm, GLOBAL mixed4* RESTRICT posDelta, GLOBAL const mixed* RESTRICT reducedMass,
GLOBAL mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn,
GLOBAL const mixed* RESTRICT constraintMatrixValue, mixed tol
#ifdef USE_MIXED_PRECISION
, GLOBAL const real4* RESTRICT posqCorrection
#endif
) {
LOCAL int groupConverged;
#ifdef USE_MIXED_PRECISION
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions, posqCorrection);
#else
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions);
#endif
for (int iteration = 0; iteration < 150; iteration++) {
SYNC_THREADS
if (constrainVelocities)
computeCCMAVelocityConstraintForce(constraintAtoms, constraintDistance, velm, reducedMass,
delta1, tol, iteration, &groupConverged);
else
computeCCMAPositionConstraintForce(constraintAtoms, constraintDistance, posDelta, reducedMass,
delta1, tol, iteration, &groupConverged);
SYNC_THREADS
multiplyByCCMAConstraintMatrix(delta1, delta2, constraintMatrixColumn, constraintMatrixValue, iteration);
SYNC_THREADS
if (constrainVelocities)
updateCCMAAtomPositions(atoms, numAtomConstraints, atomConstraints, constraintDistance, velm, velm,
delta1, delta2, iteration);
else
updateCCMAAtomPositions(atoms, numAtomConstraints, atomConstraints, constraintDistance, posDelta, velm,
delta1, delta2, iteration);
SYNC_THREADS
if (groupConverged)
return;
}
}
/** /**
* Compute the positions of virtual sites * Compute the positions of virtual sites
*/ */
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2020 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -91,29 +91,40 @@ void CudaIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, do ...@@ -91,29 +91,40 @@ void CudaIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, do
shakeKernel->setArg(1, (float) tol); shakeKernel->setArg(1, (float) tol);
shakeKernel->execute(shakeAtoms.getSize()); shakeKernel->execute(shakeAtoms.getSize());
} }
if (ccmaAtoms.isInitialized()) { if (ccmaConstraintAtoms.isInitialized()) {
ccmaForceKernel->setArg(6, ccmaConvergedDeviceMemory); if (ccmaConstraintAtoms.getSize() <= 1024) {
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) // Use the version of CCMA that runs in a single kernel with one workgroup.
ccmaForceKernel->setArg(7, tol); ccmaFullKernel->setArg(0, (int) constrainVelocities);
else if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaForceKernel->setArg(7, (float) tol); ccmaFullKernel->setArg(14, tol);
ccmaDirectionsKernel->execute(ccmaAtoms.getSize()); else
const int checkInterval = 4; ccmaFullKernel->setArg(14, (float) tol);
ccmaConvergedMemory[0] = 0; ccmaFullKernel->execute(128, 128);
ccmaUpdateKernel->setArg(3, constrainVelocities ? context.getVelm() : posDelta); }
for (int i = 0; i < 150; i++) { else {
ccmaForceKernel->setArg(8, i); ccmaForceKernel->setArg(6, ccmaConvergedDeviceMemory);
ccmaForceKernel->execute(ccmaAtoms.getSize()); if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
if ((i+1)%checkInterval == 0) ccmaForceKernel->setArg(7, tol);
CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA"); else
ccmaMultiplyKernel->setArg(5, i); ccmaForceKernel->setArg(7, (float) tol);
ccmaMultiplyKernel->execute(ccmaAtoms.getSize()); ccmaDirectionsKernel->execute(ccmaConstraintAtoms.getSize());
ccmaUpdateKernel->setArg(8, i); const int checkInterval = 4;
ccmaUpdateKernel->execute(context.getNumAtoms()); ccmaConvergedMemory[0] = 0;
if ((i+1)%checkInterval == 0) { ccmaUpdateKernel->setArg(4, constrainVelocities ? context.getVelm() : posDelta);
CHECK_RESULT2(cuEventSynchronize(ccmaEvent), "Error synchronizing on event for CCMA"); for (int i = 0; i < 150; i++) {
if (ccmaConvergedMemory[0]) ccmaForceKernel->setArg(8, i);
break; ccmaForceKernel->execute(ccmaConstraintAtoms.getSize());
if ((i+1)%checkInterval == 0)
CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA");
ccmaMultiplyKernel->setArg(5, i);
ccmaMultiplyKernel->execute(ccmaConstraintAtoms.getSize());
ccmaUpdateKernel->setArg(9, i);
ccmaUpdateKernel->execute(context.getNumAtoms());
if ((i+1)%checkInterval == 0) {
CHECK_RESULT2(cuEventSynchronize(ccmaEvent), "Error synchronizing on event for CCMA");
if (ccmaConvergedMemory[0])
break;
}
} }
} }
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2020 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -76,44 +76,56 @@ void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, ...@@ -76,44 +76,56 @@ void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities,
shakeKernel->setArg(1, (float) tol); shakeKernel->setArg(1, (float) tol);
shakeKernel->execute(shakeAtoms.getSize()); shakeKernel->execute(shakeAtoms.getSize());
} }
if (ccmaAtoms.isInitialized()) { if (ccmaConstraintAtoms.isInitialized()) {
ccmaForceKernel->setArg(6, ccmaConvergedHostBuffer); if (ccmaConstraintAtoms.getSize() <= 1024) {
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) // Use the version of CCMA that runs in a single kernel with one workgroup.
ccmaForceKernel->setArg(7, tol); ccmaFullKernel->setArg(0, (int) constrainVelocities);
else if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaForceKernel->setArg(7, (float) tol); ccmaFullKernel->setArg(14, tol);
ccmaDirectionsKernel->execute(ccmaAtoms.getSize()); else
const int checkInterval = 4; ccmaFullKernel->setArg(14, (float) tol);
OpenCLContext& cl = dynamic_cast<OpenCLContext&>(context); ccmaFullKernel->execute(128, 128);
cl::CommandQueue queue = cl.getQueue(); }
int* converged = (int*) context.getPinnedBuffer(); else {
int* ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int)); // Use the version of CCMA that uses multiple kernels.
ccmaConvergedHostMemory[0] = 0; ccmaForceKernel->setArg(6, ccmaConvergedHostBuffer);
queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory); if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaUpdateKernel->setArg(3, constrainVelocities ? context.getVelm() : posDelta); ccmaForceKernel->setArg(7, tol);
for (int i = 0; i < 150; i++) { else
ccmaForceKernel->setArg(8, i); ccmaForceKernel->setArg(7, (float) tol);
ccmaForceKernel->execute(ccmaAtoms.getSize()); ccmaDirectionsKernel->execute(ccmaConstraintAtoms.getSize());
cl::Event event; const int checkInterval = 4;
if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer) OpenCLContext& cl = dynamic_cast<OpenCLContext&>(context);
queue.enqueueReadBuffer(cl.unwrap(ccmaConverged).getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), converged, NULL, &event); cl::CommandQueue queue = cl.getQueue();
ccmaMultiplyKernel->setArg(5, i); int* converged = (int*) context.getPinnedBuffer();
ccmaMultiplyKernel->execute(ccmaAtoms.getSize()); int* ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int));
ccmaUpdateKernel->setArg(8, i); ccmaConvergedHostMemory[0] = 0;
ccmaUpdateKernel->execute(context.getNumAtoms()); queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
if ((i+1)%checkInterval == 0) { ccmaUpdateKernel->setArg(4, constrainVelocities ? context.getVelm() : posDelta);
if (ccmaUseDirectBuffer) { for (int i = 0; i < 150; i++) {
ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event); ccmaForceKernel->setArg(8, i);
queue.flush(); ccmaForceKernel->execute(ccmaConstraintAtoms.getSize());
while (event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE) cl::Event event;
; if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer)
converged[i%2] = ccmaConvergedHostMemory[0]; queue.enqueueReadBuffer(cl.unwrap(ccmaConverged).getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), converged, NULL, &event);
queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory); ccmaMultiplyKernel->setArg(5, i);
ccmaMultiplyKernel->execute(ccmaConstraintAtoms.getSize());
ccmaUpdateKernel->setArg(9, i);
ccmaUpdateKernel->execute(context.getNumAtoms());
if ((i+1)%checkInterval == 0) {
if (ccmaUseDirectBuffer) {
ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event);
queue.flush();
while (event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE)
;
converged[i%2] = ccmaConvergedHostMemory[0];
queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
}
else
event.wait();
if (converged[i%2])
break;
} }
else
event.wait();
if (converged[i%2])
break;
} }
} }
} }
......
...@@ -227,6 +227,52 @@ void testConstrainedMasslessParticles() { ...@@ -227,6 +227,52 @@ void testConstrainedMasslessParticles() {
ASSERT_EQUAL(0.0, state.getVelocities()[0][0]); ASSERT_EQUAL(0.0, state.getVelocities()[0][0]);
} }
void testConstrainedChain(int numParticles) {
// Create a linear chain of particles with all distances constrained.
System system;
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
positions[i] = Vec3(i, 0, 0);
if (i > 0) {
system.addConstraint(i-1, i, 1.0);
Vec3 delta(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
delta /= sqrt(delta.dot(delta));
positions[i] = positions[i-1]+delta;
}
}
VerletIntegrator integrator(0.001);
integrator.setConstraintTolerance(1e-5);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocitiesToTemperature(300.0);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Energy | State::Velocities | State::Forces);
for (int j = 0; j < system.getNumConstraints(); ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 2e-5);
}
double energy = state.getPotentialEnergy()+state.getKineticEnergy();
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
integrator.step(1);
}
}
void testInitialTemperature() { void testInitialTemperature() {
// Check temperature initialization for a collection of randomly placed particles // Check temperature initialization for a collection of randomly placed particles
const int numParticles = 50000; const int numParticles = 50000;
...@@ -289,6 +335,8 @@ int main(int argc, char* argv[]) { ...@@ -289,6 +335,8 @@ int main(int argc, char* argv[]) {
testConstraints(); testConstraints();
testConstrainedClusters(); testConstrainedClusters();
testConstrainedMasslessParticles(); testConstrainedMasslessParticles();
testConstrainedChain(10);
testConstrainedChain(1500);
testInitialTemperature(); testInitialTemperature();
testForceGroups(); testForceGroups();
runPlatformTests(); runPlatformTests();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment