Commit a1b2fd30 authored by Peter Eastman's avatar Peter Eastman
Browse files

Created CUDA implementation of VariableVerletIntegrator

parent 5b5ec4c3
......@@ -40,15 +40,15 @@ namespace OpenMM {
/**
* This is an error contolled, variable time step Integrator that simulates a System using the
* velocity Verlet algorithm. It compares the result of the Verlet integrator to that of an
* leap-frog Verlet algorithm. It compares the result of the Verlet integrator to that of an
* explicit Euler integrator, takes the difference between the two as a measure of the integration
* error in each time step, and continuously adjusts the step size to keep the error below a
* specified tolerance. This allows it to take larger steps on average than a fixed step size
* integrator, while still maintaining comparable accuracy and stability.
* specified tolerance. This both improves the stability of the integrator and allows it to take
* larger steps on average, while still maintaining comparable accuracy to a fixed step size integrator.
*
* It is best not to think of the error tolerance as having any absolute meaning. It is just an
* adjustable parameter that affects the step size and integration accuracy. You
* should try different values to find the largest onethat produces a trajectory sufficiently
* should try different values to find the largest one that produces a trajectory sufficiently
* accurate for your purposes. 0.001 is often a good starting point.
*
* Unlike a fixed step size Verlet integrator, variable step size Verlet is not symplectic. This
......
......@@ -55,6 +55,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaIntegrateLangevinStepKernel(name, platform, data);
if (name == IntegrateBrownianStepKernel::Name())
return new CudaIntegrateBrownianStepKernel(name, platform, data);
if (name == IntegrateVariableVerletStepKernel::Name())
return new CudaIntegrateVariableVerletStepKernel(name, platform, data);
if (name == ApplyAndersenThermostatKernel::Name())
return new CudaApplyAndersenThermostatKernel(name, platform, data);
if (name == CalcKineticEnergyKernel::Name())
......
......@@ -41,9 +41,9 @@ using namespace std;
static void calcForces(OpenMMContextImpl& context, CudaPlatform::PlatformData& data) {
_gpuContext* gpu = data.gpu;
if (data.nonbondedMethod != NO_CUTOFF && data.stepCount%100 == 0)
if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
gpuReorderAtoms(gpu);
data.stepCount++;
data.computeForceCount++;
kClearForces(gpu);
if (gpu->bIncludeGBSA) {
gpu->bRecalculateBornRadii = true;
......@@ -443,22 +443,20 @@ void CudaIntegrateVerletStepKernel::execute(OpenMMContextImpl& context, const Ve
if (stepSize != prevStepSize) {
// Initialize the GPU parameters.
gpuSetVerletIntegrationParameters(gpu, (float) stepSize);
gpuSetVerletIntegrationParameters(gpu, (float) stepSize, 0.0f);
gpuSetConstants(gpu);
kGenerateRandoms(gpu);
prevStepSize = stepSize;
}
kVerletUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM) {
int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0)
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
}
kVerletUpdatePart2(gpu);
data.time += stepSize;
data.stepCount++;
}
CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
......@@ -492,16 +490,15 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM) {
int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0)
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
}
kLangevinUpdatePart2(gpu);
kApplySecondShake(gpu);
kApplySecondSettle(gpu);
kApplySecondCCMA(gpu);
data.time += stepSize;
data.stepCount++;
}
CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
......@@ -535,13 +532,44 @@ void CudaIntegrateBrownianStepKernel::execute(OpenMMContextImpl& context, const
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM) {
int step = (int) (context.getTime()/stepSize);
if (step%data.cmMotionFrequency == 0)
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
}
kBrownianUpdatePart2(gpu);
data.time += stepSize;
data.stepCount++;
}
CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
}
void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
initializeIntegration(system, data, integrator);
prevErrorTol = -1.0;
}
void CudaIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& context, const VariableVerletIntegrator& integrator) {
_gpuContext* gpu = data.gpu;
double errorTol = integrator.getErrorTolerance();
if (errorTol != prevErrorTol) {
// Initialize the GPU parameters.
gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
gpuSetConstants(gpu);
prevErrorTol = errorTol;
}
kSelectVerletStepSize(gpu);
kVerletUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
kVerletUpdatePart2(gpu);
gpu->psStepSize->Download();
data.time += (*gpu->psStepSize)[0].y;
data.stepCount++;
}
CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
......
......@@ -373,6 +373,33 @@ private:
double prevTemp, prevFriction, prevStepSize;
};
/**
* This kernel is invoked by VariableVerletIntegrator to take one time step.
*/
class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
public:
CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateVariableVerletStepKernel(name, platform), data(data) {
}
~CudaIntegrateVariableVerletStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the VerletIntegrator this kernel will be used for
*/
void initialize(const System& system, const VariableVerletIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
*/
void execute(OpenMMContextImpl& context, const VariableVerletIntegrator& integrator);
private:
CudaPlatform::PlatformData& data;
double prevErrorTol;
};
/**
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
*/
......
......@@ -52,6 +52,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
......
......@@ -50,6 +50,7 @@ extern void kLangevinUpdatePart1(gpuContext gpu);
extern void kLangevinUpdatePart2(gpuContext gpu);
extern void kVerletUpdatePart1(gpuContext gpu);
extern void kVerletUpdatePart2(gpuContext gpu);
extern void kSelectVerletStepSize(gpuContext gpu);
extern void kBrownianUpdatePart1(gpuContext gpu);
extern void kBrownianUpdatePart2(gpuContext gpu);
......
......@@ -268,6 +268,8 @@ struct cudaGmxSimulation {
unsigned int* pWorkUnit; // Pointer to work units
unsigned int* pInteractingWorkUnit; // Pointer to work units that have interactions
unsigned int* pInteractionFlag; // Flags for which work units have interactions
float2* pStepSize; // The size of the previous and current time steps
float errorTol; // Error tolerance for selecting the step size
size_t* pInteractionCount; // A count of the number of work units which have interactions
unsigned int nonbond_workBlock; // Number of work units running simultaneously per block in CDLJ and Born Force Part 1
unsigned int bornForce2_workBlock; // Number of work units running second half of Born Forces calculation
......
......@@ -728,7 +728,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
// Compute the constraint coupling matrix
vector<vector<int> > atomAngles(gpu->natoms);
for (int i = 0; i < (int)gpu->sim.bond_angles; i++)
for (int i = 0; i < gpu->sim.bond_angles; i++)
atomAngles[(*gpu->psBondAngleID1)[i].y].push_back(i);
vector<vector<pair<int, double> > > matrix(numCCMA);
if (numCCMA > 0) {
......@@ -902,7 +902,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(*psCcmaReducedMass)[i] = 0.5f/(invMass1[c]+invMass2[c]);
for (unsigned int j = 0; j < matrix[index].size(); j++) {
(*psConstraintMatrixColumn)[i+j*numCCMA] = matrix[index][j].first;
(*psConstraintMatrixValue)[i+j*numCCMA] = (float)matrix[index][j].second;
(*psConstraintMatrixValue)[i+j*numCCMA] = matrix[index][j].second;
}
(*psConstraintMatrixColumn)[i+matrix[index].size()*numCCMA] = numCCMA;
}
......@@ -1010,6 +1010,10 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pCosSinSum = gpu->psEwaldCosSinSum->_pDevStream[0];
gpu->psObcData = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "ObcData");
gpu->sim.pObcData = gpu->psObcData->_pDevStream[0];
gpu->psStepSize = new CUDAStream<float2>(1, 1, "StepSize");
gpu->sim.pStepSize = gpu->psStepSize->_pDevStream[0];
(*gpu->psStepSize)[0] = make_float2(0.0f, 0.0f);
gpu->psStepSize->Upload();
gpu->pAtomSymbol = new unsigned char[gpu->natoms];
gpu->psAtomIndex = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex");
gpu->sim.pAtomIndex = gpu->psAtomIndex->_pDevStream[0];
......@@ -1359,9 +1363,13 @@ void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float
}
extern "C"
void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT) {
void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT, float errorTol) {
gpu->sim.deltaT = deltaT;
gpu->sim.oneOverDeltaT = 1.0f/deltaT;
gpu->sim.errorTol = errorTol;
gpu->psStepSize->Download();
(*gpu->psStepSize)[0].y = deltaT;
gpu->psStepSize->Upload();
}
extern "C"
......
......@@ -119,6 +119,7 @@ struct _gpuContext {
CUDAStream<unsigned int>* psInteractingWorkUnit;
CUDAStream<unsigned int>* psInteractionFlag;
CUDAStream<size_t>* psInteractionCount;
CUDAStream<float2>* psStepSize; // The size of the previous and current time steps
CUDAStream<float4>* psRandom4; // Pointer to sets of 4 random numbers for MD integration
CUDAStream<float2>* psRandom2; // Pointer to sets of 2 random numbers for MD integration
CUDAStream<uint4>* psRandomSeed; // Pointer to each random seed
......@@ -211,7 +212,7 @@ extern "C"
void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature);
extern "C"
void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT);
void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT, float errorTol);
extern "C"
void gpuSetBrownianIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature);
......
......@@ -30,7 +30,6 @@
#include <cstdlib>
#include <string>
#include <iostream>
//#include <fstream>
using namespace std;
#include "gputypes.h"
......@@ -90,3 +89,47 @@ void kVerletUpdatePart2(gpuContext gpu)
}
}
__global__ void kSelectVerletStepSize_kernel()
{
// Calculate the error.
extern __shared__ float error[];
error[threadIdx.x] = 0.0f;
unsigned int pos = threadIdx.x;
while (pos < cSim.atoms)
{
float4 force = cSim.pForce4[pos];
float invMass = cSim.pVelm4[pos].w;
error[threadIdx.x] += (force.x*force.x + force.y*force.y + force.z*force.z)*invMass;
pos += blockDim.x * gridDim.x;
}
// Sum the errors from all threads.
for (int offset = 1; offset < cSim.atoms; offset *= 2)
{
if (threadIdx.x+offset < cSim.atoms && threadIdx.x%(2*offset) == 0)
error[threadIdx.x] += error[threadIdx.x+offset];
__syncthreads();
}
if (threadIdx.x == 0)
{
float totalError = sqrt(error[0]/(cSim.atoms*3));
float newStepSize = sqrt(cSim.errorTol/totalError);
float oldStepSize = cSim.pStepSize[0].y;
if (oldStepSize > 0.0f)
newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
if (newStepSize > oldStepSize && newStepSize < 1.2f*oldStepSize)
newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
cSim.pStepSize[0].y = newStepSize;
}
}
void kSelectVerletStepSize(gpuContext gpu)
{
// printf("kSelectVerletStepSize\n");
kSelectVerletStepSize_kernel<<<1, gpu->sim.update_threads_per_block, sizeof(float)*gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kSelectVerletStepSize");
}
......@@ -36,6 +36,15 @@ __global__ void kVerletUpdatePart1CM_kernel()
__global__ void kVerletUpdatePart1_kernel()
#endif
{
// Load the step size to take.
__shared__ float dtPos;
__shared__ float dtVel;
if (threadIdx.x == 0)
{
float2 stepSize = cSim.pStepSize[0];
dtPos = stepSize.y;
dtVel = 0.5f*(stepSize.x+stepSize.y);
}
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
......@@ -72,13 +81,15 @@ __global__ void kVerletUpdatePart1_kernel()
offset *= 2;
__syncthreads();
}
#else
__syncthreads();
#endif
while (pos < cSim.atoms)
{
float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w;
float dtOverMass = dtVel*velocity.w;
cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x;
......@@ -90,9 +101,9 @@ __global__ void kVerletUpdatePart1_kernel()
velocity.z -= sCM[0].z;
#endif
apos.x = velocity.x*cSim.deltaT;
apos.y = velocity.y*cSim.deltaT;
apos.z = velocity.z*cSim.deltaT;
apos.x = velocity.x*dtPos;
apos.y = velocity.y*dtPos;
apos.z = velocity.z*dtPos;
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
......@@ -106,7 +117,17 @@ __global__ void kVerletUpdatePart2CM_kernel()
__global__ void kVerletUpdatePart2_kernel()
#endif
{
// Load the step size to take.
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
__shared__ float oneOverDeltaT;
if (threadIdx.x == 0)
{
float dt = cSim.pStepSize[0].y;
oneOverDeltaT = 1.0f/dt;
if (pos == 0)
cSim.pStepSize[0].x = dt;
}
__syncthreads();
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f};
......@@ -118,9 +139,9 @@ __global__ void kVerletUpdatePart2_kernel()
float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos];
velocity.x = cSim.oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z);
velocity.x = oneOverDeltaT*(xPrime.x);
velocity.y = oneOverDeltaT*(xPrime.y);
velocity.z = oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x;
xPrime.y += apos.y;
......
......@@ -802,12 +802,11 @@ void ReferenceIntegrateVariableVerletStepKernel::initialize(const System& system
}
void ReferenceIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& context, const VariableVerletIntegrator& integrator) {
double stepSize = integrator.getStepSize();
double errorTol = integrator.getErrorTolerance();
RealOpenMM** posData = ((ReferenceFloatStreamImpl&) context.getPositions().getImpl()).getData();
RealOpenMM** velData = ((ReferenceFloatStreamImpl&) context.getVelocities().getImpl()).getData();
RealOpenMM** forceData = const_cast<RealOpenMM**>(((ReferenceFloatStreamImpl&) context.getForces().getImpl()).getData()); // Reference code needs to be made const correct
if (dynamics == 0 || stepSize != prevStepSize || errorTol != prevErrorTol) {
if (dynamics == 0 || errorTol != prevErrorTol) {
// Recreate the computation objects with the new parameters.
if (dynamics) {
......@@ -819,7 +818,6 @@ void ReferenceIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& cont
findAnglesForCCMA(context.getSystem(), angles);
constraints = new ReferenceCCMAAlgorithm(context.getSystem().getNumParticles(), numConstraints, constraintIndices, constraintDistances, masses, angles, (RealOpenMM)integrator.getConstraintTolerance());
dynamics->setReferenceConstraintAlgorithm(constraints);
prevStepSize = stepSize;
prevErrorTol = errorTol;
}
dynamics->update(context.getSystem().getNumParticles(), posData, velData, forceData, masses);
......
......@@ -472,7 +472,7 @@ private:
RealOpenMM* constraintDistances;
int** constraintIndices;
int numConstraints;
double prevStepSize, prevErrorTol;
double prevErrorTol;
};
/**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment