Unverified Commit b3d98469 authored by peastman's avatar peastman Committed by GitHub
Browse files

CCMA with a small number of constraints uses a single kernel (#2818)

* CCMA with a small number of constraints uses a single kernel

* Fixed compilation errors in kernel

* Fixed compilation errors in kernel

* Further optimizations to CCMA with few constraints
parent 6d20ff07
......@@ -136,7 +136,7 @@ protected:
ComputeKernel settlePosKernel, settleVelKernel;
ComputeKernel shakePosKernel, shakeVelKernel;
ComputeKernel ccmaDirectionsKernel, ccmaPosForceKernel, ccmaVelForceKernel;
ComputeKernel ccmaMultiplyKernel, ccmaUpdateKernel;
ComputeKernel ccmaMultiplyKernel, ccmaUpdateKernel, ccmaFullKernel;
ComputeKernel vsitePositionKernel, vsiteForceKernel, vsiteSaveForcesKernel;
ComputeKernel randomKernel, timeShiftKernel;
ComputeArray posDelta;
......@@ -148,6 +148,7 @@ protected:
ComputeArray randomSeed;
ComputeArray stepSize;
ComputeArray ccmaAtoms;
ComputeArray ccmaConstraintAtoms;
ComputeArray ccmaDistance;
ComputeArray ccmaReducedMass;
ComputeArray ccmaAtomConstraints;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2009-2020 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -36,6 +36,7 @@
#include <cmath>
#include <cstdlib>
#include <map>
#include <set>
using namespace OpenMM;
using namespace std;
......@@ -292,6 +293,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
// Record the connections between constraints.
int numCCMA = (int) ccmaConstraints.size();
int numCCMAAtoms = 0;
if (numCCMA > 0) {
// Record information needed by ReferenceCCMAAlgorithm.
......@@ -354,14 +356,26 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
for (int j = 0; j < (int)matrix[i].size(); ++j)
matrix[i][j].first = inverseOrder[matrix[i][j].first];
// Make a list of all atoms that involve a CCMA constraint.
set<int> ccmaAtomsSet;
for (int i = 0; i < numCCMA; i++) {
ccmaAtomsSet.insert(atom1[ccmaConstraints[i]]);
ccmaAtomsSet.insert(atom2[ccmaConstraints[i]]);
}
vector<int> ccmaAtomsVec(ccmaAtomsSet.begin(), ccmaAtomsSet.end());
sort(ccmaAtomsVec.begin(), ccmaAtomsVec.end());
numCCMAAtoms = ccmaAtomsVec.size();
// Record the CCMA data structures.
ccmaAtoms.initialize<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaAtoms.initialize<int>(context, numCCMAAtoms, "ccmaAtoms");
ccmaConstraintAtoms.initialize<mm_int2>(context, numCCMA, "ccmaConstraintAtoms");
ccmaAtomConstraints.initialize<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints.initialize<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConstraintMatrixColumn.initialize<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged.initialize<int>(context, 2, "ccmaConverged");
vector<mm_int2> atomsVec(ccmaAtoms.getSize());
vector<mm_int2> atomsVec(ccmaConstraintAtoms.getSize());
vector<int> atomConstraintsVec(ccmaAtomConstraints.getSize());
vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
......@@ -397,7 +411,8 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
}
}
ccmaAtoms.upload(atomsVec);
ccmaAtoms.upload(ccmaAtomsVec);
ccmaConstraintAtoms.upload(atomsVec);
ccmaAtomConstraints.upload(atomConstraintsVec);
ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec);
......@@ -518,6 +533,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
// Create the kernels used by this class.
map<string, string> defines;
defines["NUM_CCMA_ATOMS"] = context.intToString(numCCMAAtoms);
defines["NUM_CCMA_CONSTRAINTS"] = context.intToString(numCCMA);
defines["NUM_ATOMS"] = context.intToString(numAtoms);
defines["NUM_2_AVERAGE"] = context.intToString(num2Avg);
......@@ -532,11 +548,12 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
settleVelKernel = program->createKernel("applySettleToVelocities");
shakePosKernel = program->createKernel("applyShakeToPositions");
shakeVelKernel = program->createKernel("applyShakeToVelocities");
ccmaDirectionsKernel = program->createKernel("computeCCMAConstraintDirections");
ccmaPosForceKernel = program->createKernel("computeCCMAPositionConstraintForce");
ccmaVelForceKernel = program->createKernel("computeCCMAVelocityConstraintForce");
ccmaMultiplyKernel = program->createKernel("multiplyByCCMAConstraintMatrix");
ccmaUpdateKernel = program->createKernel("updateCCMAAtomPositions");
ccmaDirectionsKernel = program->createKernel("computeCCMAConstraintDirectionsKernel");
ccmaPosForceKernel = program->createKernel("computeCCMAPositionConstraintForceKernel");
ccmaVelForceKernel = program->createKernel("computeCCMAVelocityConstraintForceKernel");
ccmaMultiplyKernel = program->createKernel("multiplyByCCMAConstraintMatrixKernel");
ccmaUpdateKernel = program->createKernel("updateCCMAAtomPositionsKernel");
ccmaFullKernel = program->createKernel("runCCMA");
vsitePositionKernel = program->createKernel("computeVirtualSites");
vsiteForceKernel = program->createKernel("distributeVirtualSiteForces");
vsiteSaveForcesKernel = program->createKernel("saveDistributedForces");
......@@ -621,14 +638,14 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
if (context.getUseMixedPrecision())
shakeVelKernel->addArg(context.getPosqCorrection());
}
if (ccmaAtoms.isInitialized()) {
ccmaDirectionsKernel->addArg(ccmaAtoms);
if (ccmaConstraintAtoms.isInitialized()) {
ccmaDirectionsKernel->addArg(ccmaConstraintAtoms);
ccmaDirectionsKernel->addArg(ccmaDistance);
ccmaDirectionsKernel->addArg(context.getPosq());
ccmaDirectionsKernel->addArg(ccmaConverged);
if (context.getUseMixedPrecision())
ccmaDirectionsKernel->addArg(context.getPosqCorrection());
ccmaPosForceKernel->addArg(ccmaAtoms);
ccmaPosForceKernel->addArg(ccmaConstraintAtoms);
ccmaPosForceKernel->addArg(ccmaDistance);
ccmaPosForceKernel->addArg(posDelta);
ccmaPosForceKernel->addArg(ccmaReducedMass);
......@@ -637,7 +654,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
ccmaPosForceKernel->addArg();
ccmaPosForceKernel->addArg();
ccmaPosForceKernel->addArg();
ccmaVelForceKernel->addArg(ccmaAtoms);
ccmaVelForceKernel->addArg(ccmaConstraintAtoms);
ccmaVelForceKernel->addArg(ccmaDistance);
ccmaVelForceKernel->addArg(context.getVelm());
ccmaVelForceKernel->addArg(ccmaReducedMass);
......@@ -652,6 +669,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
ccmaMultiplyKernel->addArg(ccmaConstraintMatrixValue);
ccmaMultiplyKernel->addArg(ccmaConverged);
ccmaMultiplyKernel->addArg();
ccmaUpdateKernel->addArg(ccmaAtoms);
ccmaUpdateKernel->addArg(ccmaNumAtomConstraints);
ccmaUpdateKernel->addArg(ccmaAtomConstraints);
ccmaUpdateKernel->addArg(ccmaDistance);
......@@ -661,6 +679,23 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
ccmaUpdateKernel->addArg(ccmaDelta2);
ccmaUpdateKernel->addArg(ccmaConverged);
ccmaUpdateKernel->addArg();
ccmaFullKernel->addArg();
ccmaFullKernel->addArg(ccmaAtoms);
ccmaFullKernel->addArg(ccmaNumAtomConstraints);
ccmaFullKernel->addArg(ccmaAtomConstraints);
ccmaFullKernel->addArg(ccmaConstraintAtoms);
ccmaFullKernel->addArg(ccmaDistance);
ccmaFullKernel->addArg(context.getPosq());
ccmaFullKernel->addArg(context.getVelm());
ccmaFullKernel->addArg(posDelta);
ccmaFullKernel->addArg(ccmaReducedMass);
ccmaFullKernel->addArg(ccmaDelta1);
ccmaFullKernel->addArg(ccmaDelta2);
ccmaFullKernel->addArg(ccmaConstraintMatrixColumn);
ccmaFullKernel->addArg(ccmaConstraintMatrixValue);
ccmaFullKernel->addArg();
if (context.getUseMixedPrecision())
ccmaFullKernel->addArg(context.getPosqCorrection());
}
// Arguments for time shift kernel will be set later.
......
......@@ -556,8 +556,8 @@ KERNEL void applySettleToVelocities(int numClusters, mixed tol, GLOBAL const rea
/**
* Compute the direction each CCMA constraint is pointing in. This is called once at the beginning of constraint evaluation.
*/
KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance,
GLOBAL const real4* RESTRICT atomPositions, GLOBAL int* RESTRICT converged
DEVICE void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance,
GLOBAL const real4* RESTRICT atomPositions
#ifdef USE_MIXED_PRECISION
, GLOBAL const real4* RESTRICT posqCorrection
#endif
......@@ -577,6 +577,19 @@ KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constrai
dir.z = oldPos1.z-oldPos2.z;
constraintDistance[index] = dir;
}
}
KERNEL void computeCCMAConstraintDirectionsKernel(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance,
GLOBAL const real4* RESTRICT atomPositions, GLOBAL int* RESTRICT converged
#ifdef USE_MIXED_PRECISION
, GLOBAL const real4* RESTRICT posqCorrection
#endif
) {
#ifdef USE_MIXED_PRECISION
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions, posqCorrection);
#else
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions);
#endif
if (GLOBAL_ID == 0) {
converged[0] = 1;
converged[1] = 0;
......@@ -586,19 +599,11 @@ KERNEL void computeCCMAConstraintDirections(GLOBAL const int2* RESTRICT constrai
/**
* Compute the force applied by each CCMA position constraint.
*/
KERNEL void computeCCMAPositionConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
DEVICE void computeCCMAPositionConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
GLOBAL int* RESTRICT converged, GLOBAL int* RESTRICT hostConvergedFlag, mixed tol, int iteration) {
LOCAL int groupConverged;
if (converged[1-iteration%2]) {
if (GLOBAL_ID == 0) {
converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged.
}
mixed tol, int iteration, LOCAL_ARG int* groupConverged) {
if (LOCAL_ID == 0)
groupConverged = 1;
*groupConverged = 1;
SYNC_THREADS;
mixed lowerTol = 1-2*tol+tol*tol;
mixed upperTol = 1+2*tol+tol*tol;
......@@ -620,30 +625,38 @@ KERNEL void computeCCMAPositionConstraintForce(GLOBAL const int2* RESTRICT const
delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f);
threadConverged &= (rp2 > lowerTol*dist2 && rp2 < upperTol*dist2);
}
if (groupConverged && !threadConverged)
groupConverged = 0;
SYNC_THREADS;
if (LOCAL_ID == 0 && !groupConverged)
converged[iteration%2] = 0;
if (*groupConverged && !threadConverged)
*groupConverged = 0;
}
/**
* Compute the force applied by each CCMA velocity constraint.
*/
KERNEL void computeCCMAVelocityConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
KERNEL void computeCCMAPositionConstraintForceKernel(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
GLOBAL int* RESTRICT converged, GLOBAL int* RESTRICT hostConvergedFlag, mixed tol, int iteration) {
LOCAL int groupConverged;
if (converged[1-iteration%2]) {
if (GROUP_ID == 0 && LOCAL_ID == 0) {
if (GLOBAL_ID == 0) {
converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged.
}
computeCCMAPositionConstraintForce(constraintAtoms, constraintDistance, atomPositions, reducedMass,
delta1, tol, iteration, &groupConverged);
SYNC_THREADS;
if (LOCAL_ID == 0 && !groupConverged)
converged[iteration%2] = 0;
}
/**
* Compute the force applied by each CCMA velocity constraint.
*/
DEVICE void computeCCMAVelocityConstraintForce(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
mixed tol, int iteration, LOCAL_ARG int* groupConverged) {
if (LOCAL_ID == 0)
groupConverged = 1;
*groupConverged = 1;
SYNC_THREADS;
bool threadConverged = true;
for (int index = GLOBAL_ID; index < NUM_CCMA_CONSTRAINTS; index += GLOBAL_SIZE) {
// Compute the force due to this constraint.
......@@ -653,24 +666,34 @@ KERNEL void computeCCMAVelocityConstraintForce(GLOBAL const int2* RESTRICT const
mixed rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
mixed d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
delta1[index] = -2*reducedMass[index]*rrpr/d_ij2;
threadConverged &= (fabs(delta1[index]) <= tol);
}
if (*groupConverged && !threadConverged)
*groupConverged = 0;
}
// See whether it has converged.
if (groupConverged && fabs(delta1[index]) > tol) {
groupConverged = 0;
converged[iteration%2] = 0;
KERNEL void computeCCMAVelocityConstraintForceKernel(GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL const mixed4* RESTRICT constraintDistance,
GLOBAL const mixed4* RESTRICT atomPositions, GLOBAL const mixed* RESTRICT reducedMass, GLOBAL mixed* RESTRICT delta1,
GLOBAL int* RESTRICT converged, GLOBAL int* RESTRICT hostConvergedFlag, mixed tol, int iteration) {
LOCAL int groupConverged;
if (converged[1-iteration%2]) {
if (GROUP_ID == 0 && LOCAL_ID == 0) {
converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged.
}
computeCCMAVelocityConstraintForce(constraintAtoms, constraintDistance, atomPositions, reducedMass,
delta1, tol, iteration, &groupConverged);
if (LOCAL_ID == 0 && !groupConverged)
converged[iteration%2] = 0;
}
/**
* Multiply the vector of CCMA constraint forces by the constraint matrix.
*/
KERNEL void multiplyByCCMAConstraintMatrix(GLOBAL const mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn,
GLOBAL const mixed* RESTRICT constraintMatrixValue, GLOBAL const int* RESTRICT converged, int iteration) {
if (converged[iteration%2])
return; // The constraint iteration has already converged.
DEVICE void multiplyByCCMAConstraintMatrix(GLOBAL const mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn,
GLOBAL const mixed* RESTRICT constraintMatrixValue, int iteration) {
// Multiply by the inverse constraint matrix.
for (int index = GLOBAL_ID; index < NUM_CCMA_CONSTRAINTS; index += GLOBAL_SIZE) {
......@@ -686,20 +709,24 @@ KERNEL void multiplyByCCMAConstraintMatrix(GLOBAL const mixed* RESTRICT delta1,
}
}
KERNEL void multiplyByCCMAConstraintMatrixKernel(GLOBAL const mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn,
GLOBAL const mixed* RESTRICT constraintMatrixValue, GLOBAL const int* RESTRICT converged, int iteration) {
if (converged[iteration%2])
return; // The constraint iteration has already converged.
multiplyByCCMAConstraintMatrix(delta1, delta2, constraintMatrixColumn, constraintMatrixValue, iteration);
}
/**
* Update the atom positions based on CCMA constraint forces.
*/
KERNEL void updateCCMAAtomPositions(GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints,
DEVICE void updateCCMAAtomPositions(GLOBAL const int* RESTRICT atoms, GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints,
GLOBAL const mixed4* RESTRICT constraintDistance, GLOBAL mixed4* RESTRICT atomPositions, GLOBAL const mixed4* RESTRICT velm,
GLOBAL const mixed* RESTRICT delta1, GLOBAL const mixed* RESTRICT delta2, GLOBAL int* RESTRICT converged, int iteration) {
if (GROUP_ID == 0 && LOCAL_ID == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged.
GLOBAL const mixed* RESTRICT delta1, GLOBAL const mixed* RESTRICT delta2, int iteration) {
mixed damping = (iteration < 2 ? 0.5f : 1.0f);
for (int index = GLOBAL_ID; index < NUM_ATOMS; index += GLOBAL_SIZE) {
for (int i = GLOBAL_ID; i < NUM_CCMA_ATOMS; i += GLOBAL_SIZE) {
// Compute the new position of this atom.
int index = atoms[i];
mixed4 atomPos = atomPositions[index];
mixed invMass = velm[index].w;
int num = numAtomConstraints[index];
......@@ -718,6 +745,60 @@ KERNEL void updateCCMAAtomPositions(GLOBAL const int* RESTRICT numAtomConstraint
}
}
KERNEL void updateCCMAAtomPositionsKernel(GLOBAL const int* RESTRICT atoms, GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints,
GLOBAL const mixed4* RESTRICT constraintDistance, GLOBAL mixed4* RESTRICT atomPositions, GLOBAL const mixed4* RESTRICT velm,
GLOBAL const mixed* RESTRICT delta1, GLOBAL const mixed* RESTRICT delta2, GLOBAL int* RESTRICT converged, int iteration) {
if (GROUP_ID == 0 && LOCAL_ID == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged.
updateCCMAAtomPositions(atoms, numAtomConstraints, atomConstraints, constraintDistance, atomPositions, velm,
delta1, delta2, iteration);
}
/**
* Run the entire CCMA iteration within a single kernel. This has far less overhead than
* using multiple kernels, but requires the calculation to use only a single workgroup.
* That makes it faster for small numbers of constraints, but slower for large numbers.
*/
KERNEL void runCCMA(int constrainVelocities, GLOBAL const int* RESTRICT atoms, GLOBAL const int* RESTRICT numAtomConstraints, GLOBAL const int* RESTRICT atomConstraints,
GLOBAL const int2* RESTRICT constraintAtoms, GLOBAL mixed4* RESTRICT constraintDistance, GLOBAL const real4* RESTRICT atomPositions,
GLOBAL mixed4* RESTRICT velm, GLOBAL mixed4* RESTRICT posDelta, GLOBAL const mixed* RESTRICT reducedMass,
GLOBAL mixed* RESTRICT delta1, GLOBAL mixed* RESTRICT delta2, GLOBAL const int* RESTRICT constraintMatrixColumn,
GLOBAL const mixed* RESTRICT constraintMatrixValue, mixed tol
#ifdef USE_MIXED_PRECISION
, GLOBAL const real4* RESTRICT posqCorrection
#endif
) {
LOCAL int groupConverged;
#ifdef USE_MIXED_PRECISION
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions, posqCorrection);
#else
computeCCMAConstraintDirections(constraintAtoms, constraintDistance, atomPositions);
#endif
for (int iteration = 0; iteration < 150; iteration++) {
SYNC_THREADS
if (constrainVelocities)
computeCCMAVelocityConstraintForce(constraintAtoms, constraintDistance, velm, reducedMass,
delta1, tol, iteration, &groupConverged);
else
computeCCMAPositionConstraintForce(constraintAtoms, constraintDistance, posDelta, reducedMass,
delta1, tol, iteration, &groupConverged);
SYNC_THREADS
multiplyByCCMAConstraintMatrix(delta1, delta2, constraintMatrixColumn, constraintMatrixValue, iteration);
SYNC_THREADS
if (constrainVelocities)
updateCCMAAtomPositions(atoms, numAtomConstraints, atomConstraints, constraintDistance, velm, velm,
delta1, delta2, iteration);
else
updateCCMAAtomPositions(atoms, numAtomConstraints, atomConstraints, constraintDistance, posDelta, velm,
delta1, delta2, iteration);
SYNC_THREADS
if (groupConverged)
return;
}
}
/**
* Compute the positions of virtual sites
*/
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2009-2020 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -91,24 +91,34 @@ void CudaIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, do
shakeKernel->setArg(1, (float) tol);
shakeKernel->execute(shakeAtoms.getSize());
}
if (ccmaAtoms.isInitialized()) {
if (ccmaConstraintAtoms.isInitialized()) {
if (ccmaConstraintAtoms.getSize() <= 1024) {
// Use the version of CCMA that runs in a single kernel with one workgroup.
ccmaFullKernel->setArg(0, (int) constrainVelocities);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaFullKernel->setArg(14, tol);
else
ccmaFullKernel->setArg(14, (float) tol);
ccmaFullKernel->execute(128, 128);
}
else {
ccmaForceKernel->setArg(6, ccmaConvergedDeviceMemory);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaForceKernel->setArg(7, tol);
else
ccmaForceKernel->setArg(7, (float) tol);
ccmaDirectionsKernel->execute(ccmaAtoms.getSize());
ccmaDirectionsKernel->execute(ccmaConstraintAtoms.getSize());
const int checkInterval = 4;
ccmaConvergedMemory[0] = 0;
ccmaUpdateKernel->setArg(3, constrainVelocities ? context.getVelm() : posDelta);
ccmaUpdateKernel->setArg(4, constrainVelocities ? context.getVelm() : posDelta);
for (int i = 0; i < 150; i++) {
ccmaForceKernel->setArg(8, i);
ccmaForceKernel->execute(ccmaAtoms.getSize());
ccmaForceKernel->execute(ccmaConstraintAtoms.getSize());
if ((i+1)%checkInterval == 0)
CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA");
ccmaMultiplyKernel->setArg(5, i);
ccmaMultiplyKernel->execute(ccmaAtoms.getSize());
ccmaUpdateKernel->setArg(8, i);
ccmaMultiplyKernel->execute(ccmaConstraintAtoms.getSize());
ccmaUpdateKernel->setArg(9, i);
ccmaUpdateKernel->execute(context.getNumAtoms());
if ((i+1)%checkInterval == 0) {
CHECK_RESULT2(cuEventSynchronize(ccmaEvent), "Error synchronizing on event for CCMA");
......@@ -117,6 +127,7 @@ void CudaIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, do
}
}
}
}
}
void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2009-2020 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -76,13 +76,24 @@ void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities,
shakeKernel->setArg(1, (float) tol);
shakeKernel->execute(shakeAtoms.getSize());
}
if (ccmaAtoms.isInitialized()) {
if (ccmaConstraintAtoms.isInitialized()) {
if (ccmaConstraintAtoms.getSize() <= 1024) {
// Use the version of CCMA that runs in a single kernel with one workgroup.
ccmaFullKernel->setArg(0, (int) constrainVelocities);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaFullKernel->setArg(14, tol);
else
ccmaFullKernel->setArg(14, (float) tol);
ccmaFullKernel->execute(128, 128);
}
else {
// Use the version of CCMA that uses multiple kernels.
ccmaForceKernel->setArg(6, ccmaConvergedHostBuffer);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaForceKernel->setArg(7, tol);
else
ccmaForceKernel->setArg(7, (float) tol);
ccmaDirectionsKernel->execute(ccmaAtoms.getSize());
ccmaDirectionsKernel->execute(ccmaConstraintAtoms.getSize());
const int checkInterval = 4;
OpenCLContext& cl = dynamic_cast<OpenCLContext&>(context);
cl::CommandQueue queue = cl.getQueue();
......@@ -90,16 +101,16 @@ void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities,
int* ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int));
ccmaConvergedHostMemory[0] = 0;
queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
ccmaUpdateKernel->setArg(3, constrainVelocities ? context.getVelm() : posDelta);
ccmaUpdateKernel->setArg(4, constrainVelocities ? context.getVelm() : posDelta);
for (int i = 0; i < 150; i++) {
ccmaForceKernel->setArg(8, i);
ccmaForceKernel->execute(ccmaAtoms.getSize());
ccmaForceKernel->execute(ccmaConstraintAtoms.getSize());
cl::Event event;
if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer)
queue.enqueueReadBuffer(cl.unwrap(ccmaConverged).getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), converged, NULL, &event);
ccmaMultiplyKernel->setArg(5, i);
ccmaMultiplyKernel->execute(ccmaAtoms.getSize());
ccmaUpdateKernel->setArg(8, i);
ccmaMultiplyKernel->execute(ccmaConstraintAtoms.getSize());
ccmaUpdateKernel->setArg(9, i);
ccmaUpdateKernel->execute(context.getNumAtoms());
if ((i+1)%checkInterval == 0) {
if (ccmaUseDirectBuffer) {
......@@ -117,6 +128,7 @@ void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities,
}
}
}
}
}
void OpenCLIntegrationUtilities::distributeForcesFromVirtualSites() {
......
......@@ -227,6 +227,52 @@ void testConstrainedMasslessParticles() {
ASSERT_EQUAL(0.0, state.getVelocities()[0][0]);
}
void testConstrainedChain(int numParticles) {
// Create a linear chain of particles with all distances constrained.
System system;
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
positions[i] = Vec3(i, 0, 0);
if (i > 0) {
system.addConstraint(i-1, i, 1.0);
Vec3 delta(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
delta /= sqrt(delta.dot(delta));
positions[i] = positions[i-1]+delta;
}
}
VerletIntegrator integrator(0.001);
integrator.setConstraintTolerance(1e-5);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocitiesToTemperature(300.0);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Energy | State::Velocities | State::Forces);
for (int j = 0; j < system.getNumConstraints(); ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 2e-5);
}
double energy = state.getPotentialEnergy()+state.getKineticEnergy();
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
integrator.step(1);
}
}
void testInitialTemperature() {
// Check temperature initialization for a collection of randomly placed particles
const int numParticles = 50000;
......@@ -289,6 +335,8 @@ int main(int argc, char* argv[]) {
testConstraints();
testConstrainedClusters();
testConstrainedMasslessParticles();
testConstrainedChain(10);
testConstrainedChain(1500);
testInitialTemperature();
testForceGroups();
runPlatformTests();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment