Commit b2da8475 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began CUDA implementation of C-SHAKE algorithm

parent b1cffecf
......@@ -442,7 +442,7 @@ void CudaIntegrateVerletStepKernel::execute(OpenMMContextImpl& context, const Ve
kVerletUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstLincs(gpu);
kApplyFirstCShake(gpu);
if (data.removeCM) {
int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0)
......@@ -481,7 +481,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
kUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstLincs(gpu);
kApplyFirstCShake(gpu);
if (data.removeCM) {
int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0)
......@@ -490,7 +490,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
kUpdatePart2(gpu);
kApplySecondShake(gpu);
kApplySecondSettle(gpu);
kApplySecondLincs(gpu);
kApplySecondCShake(gpu);
}
CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
......@@ -523,7 +523,7 @@ void CudaIntegrateBrownianStepKernel::execute(OpenMMContextImpl& context, const
kBrownianUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstLincs(gpu);
kApplyFirstCShake(gpu);
if (data.removeCM) {
int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0)
......
......@@ -47,10 +47,12 @@ extern void kCalculateAndersenThermostat(gpuContext gpu);
extern void kReduceBornSumAndForces(gpuContext gpu);
extern void kUpdatePart1(gpuContext gpu);
extern void kApplyFirstShake(gpuContext gpu);
extern void kApplyFirstCShake(gpuContext gpu);
extern void kApplyFirstSettle(gpuContext gpu);
extern void kApplyFirstLincs(gpuContext gpu);
extern void kUpdatePart2(gpuContext gpu);
extern void kApplySecondShake(gpuContext gpu);
extern void kApplySecondCShake(gpuContext gpu);
extern void kApplySecondSettle(gpuContext gpu);
extern void kApplySecondLincs(gpuContext gpu);
extern void kVerletUpdatePart1(gpuContext gpu);
......@@ -81,6 +83,8 @@ extern void SetUpdateShakeHSim(gpuContext gpu);
extern void GetUpdateShakeHSim(gpuContext gpu);
extern void SetSettleSim(gpuContext gpu);
extern void GetSettleSim(gpuContext gpu);
extern void SetCShakeSim(gpuContext gpu);
extern void GetCShakeSim(gpuContext gpu);
extern void SetLincsSim(gpuContext gpu);
extern void GetLincsSim(gpuContext gpu);
extern void SetVerletUpdateSim(gpuContext gpu);
......
......@@ -389,6 +389,8 @@ struct cudaGmxSimulation {
int* pLincsAtomConstraints; // The indices of constraints involving each atom
int* pLincsNumAtomConstraints; // The number of constraints involving each atom
short* pSyncCounter; // Used for global thread synchronization
unsigned int* pRequiredIterations; // Used by SHAKE to communicate whether iteration has converged
float* pShakeReducedMass; // The reduced mass for each SHAKE constraint
// Mutable stuff
float4* pPosq; // Pointer to atom positions and charges
......
......@@ -733,6 +733,12 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
CUDAStream<short>* psSyncCounter = new CUDAStream<short>(2*gpu->sim.blocks, 1, "SyncCounter");
gpu->psSyncCounter = psSyncCounter;
gpu->sim.pSyncCounter = psSyncCounter->_pDevData;
CUDAStream<unsigned int>* psRequiredIterations = new CUDAStream<unsigned int>(1, 1, "RequiredIterations");
gpu->psRequiredIterations = psRequiredIterations;
gpu->sim.pRequiredIterations = psRequiredIterations->_pDevData;
CUDAStream<float>* psShakeReducedMass = new CUDAStream<float>(numLincs, 1, "LincsSolution");
gpu->psShakeReducedMass = psShakeReducedMass;
gpu->sim.pShakeReducedMass = psShakeReducedMass->_pDevData;
gpu->sim.lincsConstraints = numLincs;
for (int i = 0; i < numLincs; i++) {
int c = lincsConstraints[i];
......@@ -740,6 +746,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(*psLincsAtoms)[i].y = atom2[c];
(*psLincsDistance)[i].w = distance[c];
(*psLincsS)[i] = 1.0f/sqrt(invMass1[c]+invMass2[c]);
(*psShakeReducedMass)[i] = 0.5f/(invMass1[c]+invMass2[c]);
(*psLincsNumConnections)[i] = linkedConstraints[i].size();
for (unsigned int j = 0; j < linkedConstraints[i].size(); j++)
(*psLincsConnections)[i+j*numLincs] = linkedConstraints[i][j];
......@@ -754,6 +761,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
psLincsAtoms->Upload();
psLincsDistance->Upload();
psLincsS->Upload();
psShakeReducedMass->Upload();
psLincsConnections->Upload();
psLincsNumConnections->Upload();
psLincsAtomConstraints->Upload();
......@@ -761,8 +769,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
psSyncCounter->Upload();
gpu->sim.lincsTerms = lincsTerms;
gpu->sim.lincs_threads_per_block = (gpu->sim.lincsConstraints + gpu->sim.blocks - 1) / gpu->sim.blocks;
if (gpu->sim.lincs_threads_per_block > gpu->sim.max_shake_threads_per_block)
gpu->sim.lincs_threads_per_block = gpu->sim.max_shake_threads_per_block;
if (gpu->sim.lincs_threads_per_block > gpu->sim.threads_per_block)
gpu->sim.lincs_threads_per_block = gpu->sim.threads_per_block;
if (gpu->sim.lincs_threads_per_block < gpu->sim.blocks)
gpu->sim.lincs_threads_per_block = gpu->sim.blocks;
......@@ -1142,6 +1150,8 @@ void* gpuInit(int numAtoms)
gpu->psLincsRhs2 = NULL;
gpu->psLincsSolution = NULL;
gpu->psSyncCounter = NULL;
gpu->psRequiredIterations = NULL;
gpu->psShakeReducedMass = NULL;
// Initialize output buffer before reading parameters
gpu->pOutputBufferCounter = new unsigned int[gpu->sim.paddedNumberOfAtoms];
......@@ -1292,6 +1302,8 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psLincsRhs2;
delete gpu->psLincsSolution;
delete gpu->psSyncCounter;
delete gpu->psRequiredIterations;
delete gpu->psShakeReducedMass;
if (gpu->cudpp != 0)
cudppDestroyPlan(gpu->cudpp);
......@@ -1601,6 +1613,7 @@ int gpuSetConstants(gpuContext gpu)
SetVerletUpdateSim(gpu);
SetBrownianUpdateSim(gpu);
SetSettleSim(gpu);
SetCShakeSim(gpu);
SetLincsSim(gpu);
SetRandomSim(gpu);
return 1;
......
......@@ -141,6 +141,8 @@ struct _gpuContext {
CUDAStream<float>* psLincsRhs2; // Workspace for LINCS
CUDAStream<float>* psLincsSolution; // Workspace for LINCS
CUDAStream<short>* psSyncCounter; // Used for global thread synchronization
CUDAStream<unsigned int>* psRequiredIterations; // Used by SHAKE to communicate whether iteration has converged
CUDAStream<float>* psShakeReducedMass; // The reduced mass for each SHAKE constraint
};
typedef struct _gpuContext *gpuContext;
......
......@@ -138,10 +138,73 @@ void testConstraints() {
}
}
void testConstrainedClusters() {
const int numParticles = 7;
const double temp = 500.0;
CudaPlatform platform;
System system;
VerletIntegrator integrator(0.002);
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(i > 1 ? 1.0 : 10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
system.addConstraint(0, 1, 1.0);
system.addConstraint(0, 2, 1.0);
system.addConstraint(0, 3, 1.0);
system.addConstraint(0, 4, 1.0);
system.addConstraint(1, 5, 1.0);
system.addConstraint(1, 6, 1.0);
system.addConstraint(2, 3, sqrt(2.0));
system.addConstraint(2, 4, sqrt(2.0));
system.addConstraint(3, 4, sqrt(2.0));
system.addConstraint(5, 6, sqrt(2.0));
system.addForce(forceField);
OpenMMContext context(system, integrator, platform);
vector<Vec3> positions(numParticles);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(1, 0, 0);
positions[2] = Vec3(-1, 0, 0);
positions[3] = Vec3(0, 1, 0);
positions[4] = Vec3(0, 0, 1);
positions[5] = Vec3(2, 0, 0);
positions[6] = Vec3(1, 1, 0);
vector<Vec3> velocities(numParticles);
init_gen_rand(0);
for (int i = 0; i < numParticles; ++i)
velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Energy);
for (int j = 0; j < system.getNumConstraints(); ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 2e-5);
}
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
integrator.step(1);
}
}
int main() {
try {
testSingleBond();
testConstraints();
testConstrainedClusters();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -183,7 +183,7 @@ ReferenceRigidShakeAlgorithm::~ReferenceRigidShakeAlgorithm( ){
SimTKOpenMMUtilities::freeOneDRealOpenMMArray( _distanceTolerance, "distanceTolerance" );
SimTKOpenMMUtilities::freeOneDRealOpenMMArray( _reducedMasses, "reducedMasses" );
}
for (int i = 0; i < (int)_matrices.size(); i++)
for (unsigned int i = 0; i < _matrices.size(); i++)
SimTKOpenMMUtilities::freeTwoDRealOpenMMArray(_matrices[i], "");
}
......@@ -346,13 +346,13 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
reducedMasses[ii] = half/( inverseMasses[atomI] + inverseMasses[atomJ] );
}
vector<double> temp;
for (int i = 0; i < (int)_rigidClusters.size(); i++) {
for (unsigned int i = 0; i < _rigidClusters.size(); i++) {
// Compute the constraint coupling matrix for this cluster.
const vector<int>& cluster = _rigidClusters[i];
int size = cluster.size();
unsigned int size = cluster.size();
vector<Vec3> r(size);
for (int j = 0; j < (int)cluster.size(); j++) {
for (unsigned int j = 0; j < cluster.size(); j++) {
int atom1 = _atomIndices[cluster[j]][0];
int atom2 = _atomIndices[cluster[j]][1];
r[j] = Vec3(atomCoordinates[atom1][0]-atomCoordinates[atom2][0],
......@@ -397,7 +397,7 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
double singularValueCutoff = 0.01*w[0];
for (int j = 0; j < size; j++)
w[j] = (w[j] < singularValueCutoff ? 0.0 : 1.0/w[j]);
if ((int)temp.size() < size)
if (temp.size() < size)
temp.resize(size);
for (int j = 0; j < size; j++) {
for (int k = 0; k < size; k++) {
......@@ -458,7 +458,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
RealOpenMM diff = dist2 - rp2;
constraintForce[ii] = zero;
RealOpenMM rrpr = DOT3( rp_ij, r_ij[ii] );
if( rrpr < d_ij2[ii]*epsilon6 ){
std::stringstream message;
message << iterations <<" "<<atomI<<" "<<atomJ<< " Error: sign of rrpr < 0?\n";
......@@ -473,11 +472,11 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
if( numberConverged == _numberOfConstraints ){
done = true;
}
for (int i = 0; i < (int)_rigidClusters.size(); i++) {
for (unsigned int i = 0; i < _rigidClusters.size(); i++) {
const vector<int>& cluster = _rigidClusters[i];
RealOpenMM** matrix = _matrices[i];
int size = cluster.size();
if (size > (int)tempForce.size())
unsigned int size = cluster.size();
if (size > tempForce.size())
tempForce.resize(size);
for (int j = 0; j < size; j++) {
tempForce[j] = zero;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment