Commit b2da8475 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began CUDA implementation of C-SHAKE algorithm

parent b1cffecf
...@@ -442,7 +442,7 @@ void CudaIntegrateVerletStepKernel::execute(OpenMMContextImpl& context, const Ve ...@@ -442,7 +442,7 @@ void CudaIntegrateVerletStepKernel::execute(OpenMMContextImpl& context, const Ve
kVerletUpdatePart1(gpu); kVerletUpdatePart1(gpu);
kApplyFirstShake(gpu); kApplyFirstShake(gpu);
kApplyFirstSettle(gpu); kApplyFirstSettle(gpu);
kApplyFirstLincs(gpu); kApplyFirstCShake(gpu);
if (data.removeCM) { if (data.removeCM) {
int step = (int) (context.getTime()/stepSize); int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0) if (step%data.cmMotionFrequency == 0)
...@@ -481,7 +481,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const ...@@ -481,7 +481,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
kUpdatePart1(gpu); kUpdatePart1(gpu);
kApplyFirstShake(gpu); kApplyFirstShake(gpu);
kApplyFirstSettle(gpu); kApplyFirstSettle(gpu);
kApplyFirstLincs(gpu); kApplyFirstCShake(gpu);
if (data.removeCM) { if (data.removeCM) {
int step = (int) (context.getTime()/stepSize); int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0) if (step%data.cmMotionFrequency == 0)
...@@ -490,7 +490,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const ...@@ -490,7 +490,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
kUpdatePart2(gpu); kUpdatePart2(gpu);
kApplySecondShake(gpu); kApplySecondShake(gpu);
kApplySecondSettle(gpu); kApplySecondSettle(gpu);
kApplySecondLincs(gpu); kApplySecondCShake(gpu);
} }
CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() { CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
...@@ -523,7 +523,7 @@ void CudaIntegrateBrownianStepKernel::execute(OpenMMContextImpl& context, const ...@@ -523,7 +523,7 @@ void CudaIntegrateBrownianStepKernel::execute(OpenMMContextImpl& context, const
kBrownianUpdatePart1(gpu); kBrownianUpdatePart1(gpu);
kApplyFirstShake(gpu); kApplyFirstShake(gpu);
kApplyFirstSettle(gpu); kApplyFirstSettle(gpu);
kApplyFirstLincs(gpu); kApplyFirstCShake(gpu);
if (data.removeCM) { if (data.removeCM) {
int step = (int) (context.getTime()/stepSize); int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0) if (step%data.cmMotionFrequency == 0)
......
...@@ -47,10 +47,12 @@ extern void kCalculateAndersenThermostat(gpuContext gpu); ...@@ -47,10 +47,12 @@ extern void kCalculateAndersenThermostat(gpuContext gpu);
extern void kReduceBornSumAndForces(gpuContext gpu); extern void kReduceBornSumAndForces(gpuContext gpu);
extern void kUpdatePart1(gpuContext gpu); extern void kUpdatePart1(gpuContext gpu);
extern void kApplyFirstShake(gpuContext gpu); extern void kApplyFirstShake(gpuContext gpu);
extern void kApplyFirstCShake(gpuContext gpu);
extern void kApplyFirstSettle(gpuContext gpu); extern void kApplyFirstSettle(gpuContext gpu);
extern void kApplyFirstLincs(gpuContext gpu); extern void kApplyFirstLincs(gpuContext gpu);
extern void kUpdatePart2(gpuContext gpu); extern void kUpdatePart2(gpuContext gpu);
extern void kApplySecondShake(gpuContext gpu); extern void kApplySecondShake(gpuContext gpu);
extern void kApplySecondCShake(gpuContext gpu);
extern void kApplySecondSettle(gpuContext gpu); extern void kApplySecondSettle(gpuContext gpu);
extern void kApplySecondLincs(gpuContext gpu); extern void kApplySecondLincs(gpuContext gpu);
extern void kVerletUpdatePart1(gpuContext gpu); extern void kVerletUpdatePart1(gpuContext gpu);
...@@ -81,6 +83,8 @@ extern void SetUpdateShakeHSim(gpuContext gpu); ...@@ -81,6 +83,8 @@ extern void SetUpdateShakeHSim(gpuContext gpu);
extern void GetUpdateShakeHSim(gpuContext gpu); extern void GetUpdateShakeHSim(gpuContext gpu);
extern void SetSettleSim(gpuContext gpu); extern void SetSettleSim(gpuContext gpu);
extern void GetSettleSim(gpuContext gpu); extern void GetSettleSim(gpuContext gpu);
extern void SetCShakeSim(gpuContext gpu);
extern void GetCShakeSim(gpuContext gpu);
extern void SetLincsSim(gpuContext gpu); extern void SetLincsSim(gpuContext gpu);
extern void GetLincsSim(gpuContext gpu); extern void GetLincsSim(gpuContext gpu);
extern void SetVerletUpdateSim(gpuContext gpu); extern void SetVerletUpdateSim(gpuContext gpu);
......
...@@ -389,6 +389,8 @@ struct cudaGmxSimulation { ...@@ -389,6 +389,8 @@ struct cudaGmxSimulation {
int* pLincsAtomConstraints; // The indices of constraints involving each atom int* pLincsAtomConstraints; // The indices of constraints involving each atom
int* pLincsNumAtomConstraints; // The number of constraints involving each atom int* pLincsNumAtomConstraints; // The number of constraints involving each atom
short* pSyncCounter; // Used for global thread synchronization short* pSyncCounter; // Used for global thread synchronization
unsigned int* pRequiredIterations; // Used by SHAKE to communicate whether iteration has converged
float* pShakeReducedMass; // The reduced mass for each SHAKE constraint
// Mutable stuff // Mutable stuff
float4* pPosq; // Pointer to atom positions and charges float4* pPosq; // Pointer to atom positions and charges
......
...@@ -733,6 +733,12 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const ...@@ -733,6 +733,12 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
CUDAStream<short>* psSyncCounter = new CUDAStream<short>(2*gpu->sim.blocks, 1, "SyncCounter"); CUDAStream<short>* psSyncCounter = new CUDAStream<short>(2*gpu->sim.blocks, 1, "SyncCounter");
gpu->psSyncCounter = psSyncCounter; gpu->psSyncCounter = psSyncCounter;
gpu->sim.pSyncCounter = psSyncCounter->_pDevData; gpu->sim.pSyncCounter = psSyncCounter->_pDevData;
CUDAStream<unsigned int>* psRequiredIterations = new CUDAStream<unsigned int>(1, 1, "RequiredIterations");
gpu->psRequiredIterations = psRequiredIterations;
gpu->sim.pRequiredIterations = psRequiredIterations->_pDevData;
CUDAStream<float>* psShakeReducedMass = new CUDAStream<float>(numLincs, 1, "LincsSolution");
gpu->psShakeReducedMass = psShakeReducedMass;
gpu->sim.pShakeReducedMass = psShakeReducedMass->_pDevData;
gpu->sim.lincsConstraints = numLincs; gpu->sim.lincsConstraints = numLincs;
for (int i = 0; i < numLincs; i++) { for (int i = 0; i < numLincs; i++) {
int c = lincsConstraints[i]; int c = lincsConstraints[i];
...@@ -740,6 +746,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const ...@@ -740,6 +746,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(*psLincsAtoms)[i].y = atom2[c]; (*psLincsAtoms)[i].y = atom2[c];
(*psLincsDistance)[i].w = distance[c]; (*psLincsDistance)[i].w = distance[c];
(*psLincsS)[i] = 1.0f/sqrt(invMass1[c]+invMass2[c]); (*psLincsS)[i] = 1.0f/sqrt(invMass1[c]+invMass2[c]);
(*psShakeReducedMass)[i] = 0.5f/(invMass1[c]+invMass2[c]);
(*psLincsNumConnections)[i] = linkedConstraints[i].size(); (*psLincsNumConnections)[i] = linkedConstraints[i].size();
for (unsigned int j = 0; j < linkedConstraints[i].size(); j++) for (unsigned int j = 0; j < linkedConstraints[i].size(); j++)
(*psLincsConnections)[i+j*numLincs] = linkedConstraints[i][j]; (*psLincsConnections)[i+j*numLincs] = linkedConstraints[i][j];
...@@ -754,6 +761,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const ...@@ -754,6 +761,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
psLincsAtoms->Upload(); psLincsAtoms->Upload();
psLincsDistance->Upload(); psLincsDistance->Upload();
psLincsS->Upload(); psLincsS->Upload();
psShakeReducedMass->Upload();
psLincsConnections->Upload(); psLincsConnections->Upload();
psLincsNumConnections->Upload(); psLincsNumConnections->Upload();
psLincsAtomConstraints->Upload(); psLincsAtomConstraints->Upload();
...@@ -761,8 +769,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const ...@@ -761,8 +769,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
psSyncCounter->Upload(); psSyncCounter->Upload();
gpu->sim.lincsTerms = lincsTerms; gpu->sim.lincsTerms = lincsTerms;
gpu->sim.lincs_threads_per_block = (gpu->sim.lincsConstraints + gpu->sim.blocks - 1) / gpu->sim.blocks; gpu->sim.lincs_threads_per_block = (gpu->sim.lincsConstraints + gpu->sim.blocks - 1) / gpu->sim.blocks;
if (gpu->sim.lincs_threads_per_block > gpu->sim.max_shake_threads_per_block) if (gpu->sim.lincs_threads_per_block > gpu->sim.threads_per_block)
gpu->sim.lincs_threads_per_block = gpu->sim.max_shake_threads_per_block; gpu->sim.lincs_threads_per_block = gpu->sim.threads_per_block;
if (gpu->sim.lincs_threads_per_block < gpu->sim.blocks) if (gpu->sim.lincs_threads_per_block < gpu->sim.blocks)
gpu->sim.lincs_threads_per_block = gpu->sim.blocks; gpu->sim.lincs_threads_per_block = gpu->sim.blocks;
...@@ -1142,6 +1150,8 @@ void* gpuInit(int numAtoms) ...@@ -1142,6 +1150,8 @@ void* gpuInit(int numAtoms)
gpu->psLincsRhs2 = NULL; gpu->psLincsRhs2 = NULL;
gpu->psLincsSolution = NULL; gpu->psLincsSolution = NULL;
gpu->psSyncCounter = NULL; gpu->psSyncCounter = NULL;
gpu->psRequiredIterations = NULL;
gpu->psShakeReducedMass = NULL;
// Initialize output buffer before reading parameters // Initialize output buffer before reading parameters
gpu->pOutputBufferCounter = new unsigned int[gpu->sim.paddedNumberOfAtoms]; gpu->pOutputBufferCounter = new unsigned int[gpu->sim.paddedNumberOfAtoms];
...@@ -1292,6 +1302,8 @@ void gpuShutDown(gpuContext gpu) ...@@ -1292,6 +1302,8 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psLincsRhs2; delete gpu->psLincsRhs2;
delete gpu->psLincsSolution; delete gpu->psLincsSolution;
delete gpu->psSyncCounter; delete gpu->psSyncCounter;
delete gpu->psRequiredIterations;
delete gpu->psShakeReducedMass;
if (gpu->cudpp != 0) if (gpu->cudpp != 0)
cudppDestroyPlan(gpu->cudpp); cudppDestroyPlan(gpu->cudpp);
...@@ -1601,6 +1613,7 @@ int gpuSetConstants(gpuContext gpu) ...@@ -1601,6 +1613,7 @@ int gpuSetConstants(gpuContext gpu)
SetVerletUpdateSim(gpu); SetVerletUpdateSim(gpu);
SetBrownianUpdateSim(gpu); SetBrownianUpdateSim(gpu);
SetSettleSim(gpu); SetSettleSim(gpu);
SetCShakeSim(gpu);
SetLincsSim(gpu); SetLincsSim(gpu);
SetRandomSim(gpu); SetRandomSim(gpu);
return 1; return 1;
......
...@@ -141,6 +141,8 @@ struct _gpuContext { ...@@ -141,6 +141,8 @@ struct _gpuContext {
CUDAStream<float>* psLincsRhs2; // Workspace for LINCS CUDAStream<float>* psLincsRhs2; // Workspace for LINCS
CUDAStream<float>* psLincsSolution; // Workspace for LINCS CUDAStream<float>* psLincsSolution; // Workspace for LINCS
CUDAStream<short>* psSyncCounter; // Used for global thread synchronization CUDAStream<short>* psSyncCounter; // Used for global thread synchronization
CUDAStream<unsigned int>* psRequiredIterations; // Used by SHAKE to communicate whether iteration has converged
CUDAStream<float>* psShakeReducedMass; // The reduced mass for each SHAKE constraint
}; };
typedef struct _gpuContext *gpuContext; typedef struct _gpuContext *gpuContext;
......
...@@ -138,10 +138,73 @@ void testConstraints() { ...@@ -138,10 +138,73 @@ void testConstraints() {
} }
} }
void testConstrainedClusters() {
const int numParticles = 7;
const double temp = 500.0;
CudaPlatform platform;
System system;
VerletIntegrator integrator(0.002);
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(i > 1 ? 1.0 : 10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
system.addConstraint(0, 1, 1.0);
system.addConstraint(0, 2, 1.0);
system.addConstraint(0, 3, 1.0);
system.addConstraint(0, 4, 1.0);
system.addConstraint(1, 5, 1.0);
system.addConstraint(1, 6, 1.0);
system.addConstraint(2, 3, sqrt(2.0));
system.addConstraint(2, 4, sqrt(2.0));
system.addConstraint(3, 4, sqrt(2.0));
system.addConstraint(5, 6, sqrt(2.0));
system.addForce(forceField);
OpenMMContext context(system, integrator, platform);
vector<Vec3> positions(numParticles);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(1, 0, 0);
positions[2] = Vec3(-1, 0, 0);
positions[3] = Vec3(0, 1, 0);
positions[4] = Vec3(0, 0, 1);
positions[5] = Vec3(2, 0, 0);
positions[6] = Vec3(1, 1, 0);
vector<Vec3> velocities(numParticles);
init_gen_rand(0);
for (int i = 0; i < numParticles; ++i)
velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Energy);
for (int j = 0; j < system.getNumConstraints(); ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 2e-5);
}
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
integrator.step(1);
}
}
int main() { int main() {
try { try {
testSingleBond(); testSingleBond();
testConstraints(); testConstraints();
testConstrainedClusters();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -183,7 +183,7 @@ ReferenceRigidShakeAlgorithm::~ReferenceRigidShakeAlgorithm( ){ ...@@ -183,7 +183,7 @@ ReferenceRigidShakeAlgorithm::~ReferenceRigidShakeAlgorithm( ){
SimTKOpenMMUtilities::freeOneDRealOpenMMArray( _distanceTolerance, "distanceTolerance" ); SimTKOpenMMUtilities::freeOneDRealOpenMMArray( _distanceTolerance, "distanceTolerance" );
SimTKOpenMMUtilities::freeOneDRealOpenMMArray( _reducedMasses, "reducedMasses" ); SimTKOpenMMUtilities::freeOneDRealOpenMMArray( _reducedMasses, "reducedMasses" );
} }
for (int i = 0; i < (int)_matrices.size(); i++) for (unsigned int i = 0; i < _matrices.size(); i++)
SimTKOpenMMUtilities::freeTwoDRealOpenMMArray(_matrices[i], ""); SimTKOpenMMUtilities::freeTwoDRealOpenMMArray(_matrices[i], "");
} }
...@@ -346,13 +346,13 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo ...@@ -346,13 +346,13 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
reducedMasses[ii] = half/( inverseMasses[atomI] + inverseMasses[atomJ] ); reducedMasses[ii] = half/( inverseMasses[atomI] + inverseMasses[atomJ] );
} }
vector<double> temp; vector<double> temp;
for (int i = 0; i < (int)_rigidClusters.size(); i++) { for (unsigned int i = 0; i < _rigidClusters.size(); i++) {
// Compute the constraint coupling matrix for this cluster. // Compute the constraint coupling matrix for this cluster.
const vector<int>& cluster = _rigidClusters[i]; const vector<int>& cluster = _rigidClusters[i];
int size = cluster.size(); unsigned int size = cluster.size();
vector<Vec3> r(size); vector<Vec3> r(size);
for (int j = 0; j < (int)cluster.size(); j++) { for (unsigned int j = 0; j < cluster.size(); j++) {
int atom1 = _atomIndices[cluster[j]][0]; int atom1 = _atomIndices[cluster[j]][0];
int atom2 = _atomIndices[cluster[j]][1]; int atom2 = _atomIndices[cluster[j]][1];
r[j] = Vec3(atomCoordinates[atom1][0]-atomCoordinates[atom2][0], r[j] = Vec3(atomCoordinates[atom1][0]-atomCoordinates[atom2][0],
...@@ -397,7 +397,7 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo ...@@ -397,7 +397,7 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
double singularValueCutoff = 0.01*w[0]; double singularValueCutoff = 0.01*w[0];
for (int j = 0; j < size; j++) for (int j = 0; j < size; j++)
w[j] = (w[j] < singularValueCutoff ? 0.0 : 1.0/w[j]); w[j] = (w[j] < singularValueCutoff ? 0.0 : 1.0/w[j]);
if ((int)temp.size() < size) if (temp.size() < size)
temp.resize(size); temp.resize(size);
for (int j = 0; j < size; j++) { for (int j = 0; j < size; j++) {
for (int k = 0; k < size; k++) { for (int k = 0; k < size; k++) {
...@@ -458,7 +458,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo ...@@ -458,7 +458,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
RealOpenMM diff = dist2 - rp2; RealOpenMM diff = dist2 - rp2;
constraintForce[ii] = zero; constraintForce[ii] = zero;
RealOpenMM rrpr = DOT3( rp_ij, r_ij[ii] ); RealOpenMM rrpr = DOT3( rp_ij, r_ij[ii] );
if( rrpr < d_ij2[ii]*epsilon6 ){ if( rrpr < d_ij2[ii]*epsilon6 ){
std::stringstream message; std::stringstream message;
message << iterations <<" "<<atomI<<" "<<atomJ<< " Error: sign of rrpr < 0?\n"; message << iterations <<" "<<atomI<<" "<<atomJ<< " Error: sign of rrpr < 0?\n";
...@@ -473,11 +472,11 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo ...@@ -473,11 +472,11 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
if( numberConverged == _numberOfConstraints ){ if( numberConverged == _numberOfConstraints ){
done = true; done = true;
} }
for (int i = 0; i < (int)_rigidClusters.size(); i++) { for (unsigned int i = 0; i < _rigidClusters.size(); i++) {
const vector<int>& cluster = _rigidClusters[i]; const vector<int>& cluster = _rigidClusters[i];
RealOpenMM** matrix = _matrices[i]; RealOpenMM** matrix = _matrices[i];
int size = cluster.size(); unsigned int size = cluster.size();
if (size > (int)tempForce.size()) if (size > tempForce.size())
tempForce.resize(size); tempForce.resize(size);
for (int j = 0; j < size; j++) { for (int j = 0; j < size; j++) {
tempForce[j] = zero; tempForce[j] = zero;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment