Commit a1b2fd30 authored by Peter Eastman's avatar Peter Eastman
Browse files

Created CUDA implementation of VariableVerletIntegrator

parent 5b5ec4c3
...@@ -40,15 +40,15 @@ namespace OpenMM { ...@@ -40,15 +40,15 @@ namespace OpenMM {
/** /**
* This is an error contolled, variable time step Integrator that simulates a System using the * This is an error contolled, variable time step Integrator that simulates a System using the
* velocity Verlet algorithm. It compares the result of the Verlet integrator to that of an * leap-frog Verlet algorithm. It compares the result of the Verlet integrator to that of an
* explicit Euler integrator, takes the difference between the two as a measure of the integration * explicit Euler integrator, takes the difference between the two as a measure of the integration
* error in each time step, and continuously adjusts the step size to keep the error below a * error in each time step, and continuously adjusts the step size to keep the error below a
* specified tolerance. This allows it to take larger steps on average than a fixed step size * specified tolerance. This both improves the stability of the integrator and allows it to take
* integrator, while still maintaining comparable accuracy and stability. * larger steps on average, while still maintaining comparable accuracy to a fixed step size integrator.
* *
* It is best not to think of the error tolerance as having any absolute meaning. It is just an * It is best not to think of the error tolerance as having any absolute meaning. It is just an
* adjustable parameter that affects the step size and integration accuracy. You * adjustable parameter that affects the step size and integration accuracy. You
* should try different values to find the largest onethat produces a trajectory sufficiently * should try different values to find the largest one that produces a trajectory sufficiently
* accurate for your purposes. 0.001 is often a good starting point. * accurate for your purposes. 0.001 is often a good starting point.
* *
* Unlike a fixed step size Verlet integrator, variable step size Verlet is not symplectic. This * Unlike a fixed step size Verlet integrator, variable step size Verlet is not symplectic. This
......
...@@ -55,6 +55,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -55,6 +55,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaIntegrateLangevinStepKernel(name, platform, data); return new CudaIntegrateLangevinStepKernel(name, platform, data);
if (name == IntegrateBrownianStepKernel::Name()) if (name == IntegrateBrownianStepKernel::Name())
return new CudaIntegrateBrownianStepKernel(name, platform, data); return new CudaIntegrateBrownianStepKernel(name, platform, data);
if (name == IntegrateVariableVerletStepKernel::Name())
return new CudaIntegrateVariableVerletStepKernel(name, platform, data);
if (name == ApplyAndersenThermostatKernel::Name()) if (name == ApplyAndersenThermostatKernel::Name())
return new CudaApplyAndersenThermostatKernel(name, platform, data); return new CudaApplyAndersenThermostatKernel(name, platform, data);
if (name == CalcKineticEnergyKernel::Name()) if (name == CalcKineticEnergyKernel::Name())
......
...@@ -41,9 +41,9 @@ using namespace std; ...@@ -41,9 +41,9 @@ using namespace std;
static void calcForces(OpenMMContextImpl& context, CudaPlatform::PlatformData& data) { static void calcForces(OpenMMContextImpl& context, CudaPlatform::PlatformData& data) {
_gpuContext* gpu = data.gpu; _gpuContext* gpu = data.gpu;
if (data.nonbondedMethod != NO_CUTOFF && data.stepCount%100 == 0) if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
gpuReorderAtoms(gpu); gpuReorderAtoms(gpu);
data.stepCount++; data.computeForceCount++;
kClearForces(gpu); kClearForces(gpu);
if (gpu->bIncludeGBSA) { if (gpu->bIncludeGBSA) {
gpu->bRecalculateBornRadii = true; gpu->bRecalculateBornRadii = true;
...@@ -443,22 +443,20 @@ void CudaIntegrateVerletStepKernel::execute(OpenMMContextImpl& context, const Ve ...@@ -443,22 +443,20 @@ void CudaIntegrateVerletStepKernel::execute(OpenMMContextImpl& context, const Ve
if (stepSize != prevStepSize) { if (stepSize != prevStepSize) {
// Initialize the GPU parameters. // Initialize the GPU parameters.
gpuSetVerletIntegrationParameters(gpu, (float) stepSize); gpuSetVerletIntegrationParameters(gpu, (float) stepSize, 0.0f);
gpuSetConstants(gpu); gpuSetConstants(gpu);
kGenerateRandoms(gpu);
prevStepSize = stepSize; prevStepSize = stepSize;
} }
kVerletUpdatePart1(gpu); kVerletUpdatePart1(gpu);
kApplyFirstShake(gpu); kApplyFirstShake(gpu);
kApplyFirstSettle(gpu); kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu); kApplyFirstCCMA(gpu);
if (data.removeCM) { if (data.removeCM)
int step = (int) (context.getTime()/stepSize); if (data.stepCount%data.cmMotionFrequency == 0)
if (step%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true; gpu->bCalculateCM = true;
}
kVerletUpdatePart2(gpu); kVerletUpdatePart2(gpu);
data.time += stepSize; data.time += stepSize;
data.stepCount++;
} }
CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() { CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
...@@ -492,16 +490,15 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const ...@@ -492,16 +490,15 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
kApplyFirstShake(gpu); kApplyFirstShake(gpu);
kApplyFirstSettle(gpu); kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu); kApplyFirstCCMA(gpu);
if (data.removeCM) { if (data.removeCM)
int step = (int) (context.getTime()/stepSize); if (data.stepCount%data.cmMotionFrequency == 0)
if (step%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true; gpu->bCalculateCM = true;
}
kLangevinUpdatePart2(gpu); kLangevinUpdatePart2(gpu);
kApplySecondShake(gpu); kApplySecondShake(gpu);
kApplySecondSettle(gpu); kApplySecondSettle(gpu);
kApplySecondCCMA(gpu); kApplySecondCCMA(gpu);
data.time += stepSize; data.time += stepSize;
data.stepCount++;
} }
CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() { CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
...@@ -535,13 +532,44 @@ void CudaIntegrateBrownianStepKernel::execute(OpenMMContextImpl& context, const ...@@ -535,13 +532,44 @@ void CudaIntegrateBrownianStepKernel::execute(OpenMMContextImpl& context, const
kApplyFirstShake(gpu); kApplyFirstShake(gpu);
kApplyFirstSettle(gpu); kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu); kApplyFirstCCMA(gpu);
if (data.removeCM) { if (data.removeCM)
int step = (int) (context.getTime()/stepSize); if (data.stepCount%data.cmMotionFrequency == 0)
if (step%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true; gpu->bCalculateCM = true;
}
kBrownianUpdatePart2(gpu); kBrownianUpdatePart2(gpu);
data.time += stepSize; data.time += stepSize;
data.stepCount++;
}
CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
}
void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
initializeIntegration(system, data, integrator);
prevErrorTol = -1.0;
}
void CudaIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& context, const VariableVerletIntegrator& integrator) {
_gpuContext* gpu = data.gpu;
double errorTol = integrator.getErrorTolerance();
if (errorTol != prevErrorTol) {
// Initialize the GPU parameters.
gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
gpuSetConstants(gpu);
prevErrorTol = errorTol;
}
kSelectVerletStepSize(gpu);
kVerletUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
kVerletUpdatePart2(gpu);
gpu->psStepSize->Download();
data.time += (*gpu->psStepSize)[0].y;
data.stepCount++;
} }
CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() { CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
......
...@@ -373,6 +373,33 @@ private: ...@@ -373,6 +373,33 @@ private:
double prevTemp, prevFriction, prevStepSize; double prevTemp, prevFriction, prevStepSize;
}; };
/**
* This kernel is invoked by VariableVerletIntegrator to take one time step.
*/
class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
public:
CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateVariableVerletStepKernel(name, platform), data(data) {
}
~CudaIntegrateVariableVerletStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the VerletIntegrator this kernel will be used for
*/
void initialize(const System& system, const VariableVerletIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
*/
void execute(OpenMMContextImpl& context, const VariableVerletIntegrator& integrator);
private:
CudaPlatform::PlatformData& data;
double prevErrorTol;
};
/** /**
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities. * This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
*/ */
......
...@@ -52,6 +52,7 @@ CudaPlatform::CudaPlatform() { ...@@ -52,6 +52,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory); registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory); registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory); registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
registerKernelFactory(CalcKineticEnergyKernel::Name(), factory); registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory); registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
......
...@@ -50,6 +50,7 @@ extern void kLangevinUpdatePart1(gpuContext gpu); ...@@ -50,6 +50,7 @@ extern void kLangevinUpdatePart1(gpuContext gpu);
extern void kLangevinUpdatePart2(gpuContext gpu); extern void kLangevinUpdatePart2(gpuContext gpu);
extern void kVerletUpdatePart1(gpuContext gpu); extern void kVerletUpdatePart1(gpuContext gpu);
extern void kVerletUpdatePart2(gpuContext gpu); extern void kVerletUpdatePart2(gpuContext gpu);
extern void kSelectVerletStepSize(gpuContext gpu);
extern void kBrownianUpdatePart1(gpuContext gpu); extern void kBrownianUpdatePart1(gpuContext gpu);
extern void kBrownianUpdatePart2(gpuContext gpu); extern void kBrownianUpdatePart2(gpuContext gpu);
......
...@@ -268,6 +268,8 @@ struct cudaGmxSimulation { ...@@ -268,6 +268,8 @@ struct cudaGmxSimulation {
unsigned int* pWorkUnit; // Pointer to work units unsigned int* pWorkUnit; // Pointer to work units
unsigned int* pInteractingWorkUnit; // Pointer to work units that have interactions unsigned int* pInteractingWorkUnit; // Pointer to work units that have interactions
unsigned int* pInteractionFlag; // Flags for which work units have interactions unsigned int* pInteractionFlag; // Flags for which work units have interactions
float2* pStepSize; // The size of the previous and current time steps
float errorTol; // Error tolerance for selecting the step size
size_t* pInteractionCount; // A count of the number of work units which have interactions size_t* pInteractionCount; // A count of the number of work units which have interactions
unsigned int nonbond_workBlock; // Number of work units running simultaneously per block in CDLJ and Born Force Part 1 unsigned int nonbond_workBlock; // Number of work units running simultaneously per block in CDLJ and Born Force Part 1
unsigned int bornForce2_workBlock; // Number of work units running second half of Born Forces calculation unsigned int bornForce2_workBlock; // Number of work units running second half of Born Forces calculation
......
...@@ -728,7 +728,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const ...@@ -728,7 +728,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
// Compute the constraint coupling matrix // Compute the constraint coupling matrix
vector<vector<int> > atomAngles(gpu->natoms); vector<vector<int> > atomAngles(gpu->natoms);
for (int i = 0; i < (int)gpu->sim.bond_angles; i++) for (int i = 0; i < gpu->sim.bond_angles; i++)
atomAngles[(*gpu->psBondAngleID1)[i].y].push_back(i); atomAngles[(*gpu->psBondAngleID1)[i].y].push_back(i);
vector<vector<pair<int, double> > > matrix(numCCMA); vector<vector<pair<int, double> > > matrix(numCCMA);
if (numCCMA > 0) { if (numCCMA > 0) {
...@@ -902,7 +902,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const ...@@ -902,7 +902,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(*psCcmaReducedMass)[i] = 0.5f/(invMass1[c]+invMass2[c]); (*psCcmaReducedMass)[i] = 0.5f/(invMass1[c]+invMass2[c]);
for (unsigned int j = 0; j < matrix[index].size(); j++) { for (unsigned int j = 0; j < matrix[index].size(); j++) {
(*psConstraintMatrixColumn)[i+j*numCCMA] = matrix[index][j].first; (*psConstraintMatrixColumn)[i+j*numCCMA] = matrix[index][j].first;
(*psConstraintMatrixValue)[i+j*numCCMA] = (float)matrix[index][j].second; (*psConstraintMatrixValue)[i+j*numCCMA] = matrix[index][j].second;
} }
(*psConstraintMatrixColumn)[i+matrix[index].size()*numCCMA] = numCCMA; (*psConstraintMatrixColumn)[i+matrix[index].size()*numCCMA] = numCCMA;
} }
...@@ -1010,6 +1010,10 @@ int gpuAllocateInitialBuffers(gpuContext gpu) ...@@ -1010,6 +1010,10 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pCosSinSum = gpu->psEwaldCosSinSum->_pDevStream[0]; gpu->sim.pCosSinSum = gpu->psEwaldCosSinSum->_pDevStream[0];
gpu->psObcData = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "ObcData"); gpu->psObcData = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "ObcData");
gpu->sim.pObcData = gpu->psObcData->_pDevStream[0]; gpu->sim.pObcData = gpu->psObcData->_pDevStream[0];
gpu->psStepSize = new CUDAStream<float2>(1, 1, "StepSize");
gpu->sim.pStepSize = gpu->psStepSize->_pDevStream[0];
(*gpu->psStepSize)[0] = make_float2(0.0f, 0.0f);
gpu->psStepSize->Upload();
gpu->pAtomSymbol = new unsigned char[gpu->natoms]; gpu->pAtomSymbol = new unsigned char[gpu->natoms];
gpu->psAtomIndex = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex"); gpu->psAtomIndex = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex");
gpu->sim.pAtomIndex = gpu->psAtomIndex->_pDevStream[0]; gpu->sim.pAtomIndex = gpu->psAtomIndex->_pDevStream[0];
...@@ -1359,9 +1363,13 @@ void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float ...@@ -1359,9 +1363,13 @@ void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float
} }
extern "C" extern "C"
void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT) { void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT, float errorTol) {
gpu->sim.deltaT = deltaT; gpu->sim.deltaT = deltaT;
gpu->sim.oneOverDeltaT = 1.0f/deltaT; gpu->sim.oneOverDeltaT = 1.0f/deltaT;
gpu->sim.errorTol = errorTol;
gpu->psStepSize->Download();
(*gpu->psStepSize)[0].y = deltaT;
gpu->psStepSize->Upload();
} }
extern "C" extern "C"
......
...@@ -119,6 +119,7 @@ struct _gpuContext { ...@@ -119,6 +119,7 @@ struct _gpuContext {
CUDAStream<unsigned int>* psInteractingWorkUnit; CUDAStream<unsigned int>* psInteractingWorkUnit;
CUDAStream<unsigned int>* psInteractionFlag; CUDAStream<unsigned int>* psInteractionFlag;
CUDAStream<size_t>* psInteractionCount; CUDAStream<size_t>* psInteractionCount;
CUDAStream<float2>* psStepSize; // The size of the previous and current time steps
CUDAStream<float4>* psRandom4; // Pointer to sets of 4 random numbers for MD integration CUDAStream<float4>* psRandom4; // Pointer to sets of 4 random numbers for MD integration
CUDAStream<float2>* psRandom2; // Pointer to sets of 2 random numbers for MD integration CUDAStream<float2>* psRandom2; // Pointer to sets of 2 random numbers for MD integration
CUDAStream<uint4>* psRandomSeed; // Pointer to each random seed CUDAStream<uint4>* psRandomSeed; // Pointer to each random seed
...@@ -211,7 +212,7 @@ extern "C" ...@@ -211,7 +212,7 @@ extern "C"
void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature); void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature);
extern "C" extern "C"
void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT); void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT, float errorTol);
extern "C" extern "C"
void gpuSetBrownianIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature); void gpuSetBrownianIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature);
......
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
#include <cstdlib> #include <cstdlib>
#include <string> #include <string>
#include <iostream> #include <iostream>
//#include <fstream>
using namespace std; using namespace std;
#include "gputypes.h" #include "gputypes.h"
...@@ -90,3 +89,47 @@ void kVerletUpdatePart2(gpuContext gpu) ...@@ -90,3 +89,47 @@ void kVerletUpdatePart2(gpuContext gpu)
} }
} }
__global__ void kSelectVerletStepSize_kernel()
{
// Calculate the error.
extern __shared__ float error[];
error[threadIdx.x] = 0.0f;
unsigned int pos = threadIdx.x;
while (pos < cSim.atoms)
{
float4 force = cSim.pForce4[pos];
float invMass = cSim.pVelm4[pos].w;
error[threadIdx.x] += (force.x*force.x + force.y*force.y + force.z*force.z)*invMass;
pos += blockDim.x * gridDim.x;
}
// Sum the errors from all threads.
for (int offset = 1; offset < cSim.atoms; offset *= 2)
{
if (threadIdx.x+offset < cSim.atoms && threadIdx.x%(2*offset) == 0)
error[threadIdx.x] += error[threadIdx.x+offset];
__syncthreads();
}
if (threadIdx.x == 0)
{
float totalError = sqrt(error[0]/(cSim.atoms*3));
float newStepSize = sqrt(cSim.errorTol/totalError);
float oldStepSize = cSim.pStepSize[0].y;
if (oldStepSize > 0.0f)
newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
if (newStepSize > oldStepSize && newStepSize < 1.2f*oldStepSize)
newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
cSim.pStepSize[0].y = newStepSize;
}
}
void kSelectVerletStepSize(gpuContext gpu)
{
// printf("kSelectVerletStepSize\n");
kSelectVerletStepSize_kernel<<<1, gpu->sim.update_threads_per_block, sizeof(float)*gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kSelectVerletStepSize");
}
...@@ -36,6 +36,15 @@ __global__ void kVerletUpdatePart1CM_kernel() ...@@ -36,6 +36,15 @@ __global__ void kVerletUpdatePart1CM_kernel()
__global__ void kVerletUpdatePart1_kernel() __global__ void kVerletUpdatePart1_kernel()
#endif #endif
{ {
// Load the step size to take.
__shared__ float dtPos;
__shared__ float dtVel;
if (threadIdx.x == 0)
{
float2 stepSize = cSim.pStepSize[0];
dtPos = stepSize.y;
dtVel = 0.5f*(stepSize.x+stepSize.y);
}
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
#ifdef REMOVE_CM #ifdef REMOVE_CM
extern __shared__ float3 sCM[]; extern __shared__ float3 sCM[];
...@@ -72,13 +81,15 @@ __global__ void kVerletUpdatePart1_kernel() ...@@ -72,13 +81,15 @@ __global__ void kVerletUpdatePart1_kernel()
offset *= 2; offset *= 2;
__syncthreads(); __syncthreads();
} }
#else
__syncthreads();
#endif #endif
while (pos < cSim.atoms) while (pos < cSim.atoms)
{ {
float4 apos = cSim.pPosq[pos]; float4 apos = cSim.pPosq[pos];
float4 velocity = cSim.pVelm4[pos]; float4 velocity = cSim.pVelm4[pos];
float4 force = cSim.pForce4[pos]; float4 force = cSim.pForce4[pos];
float dtOverMass = cSim.deltaT*velocity.w; float dtOverMass = dtVel*velocity.w;
cSim.pOldPosq[pos] = apos; cSim.pOldPosq[pos] = apos;
velocity.x += dtOverMass*force.x; velocity.x += dtOverMass*force.x;
...@@ -90,9 +101,9 @@ __global__ void kVerletUpdatePart1_kernel() ...@@ -90,9 +101,9 @@ __global__ void kVerletUpdatePart1_kernel()
velocity.z -= sCM[0].z; velocity.z -= sCM[0].z;
#endif #endif
apos.x = velocity.x*cSim.deltaT; apos.x = velocity.x*dtPos;
apos.y = velocity.y*cSim.deltaT; apos.y = velocity.y*dtPos;
apos.z = velocity.z*cSim.deltaT; apos.z = velocity.z*dtPos;
cSim.pPosqP[pos] = apos; cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity; cSim.pVelm4[pos] = velocity;
...@@ -106,7 +117,17 @@ __global__ void kVerletUpdatePart2CM_kernel() ...@@ -106,7 +117,17 @@ __global__ void kVerletUpdatePart2CM_kernel()
__global__ void kVerletUpdatePart2_kernel() __global__ void kVerletUpdatePart2_kernel()
#endif #endif
{ {
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; // Load the step size to take.
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
__shared__ float oneOverDeltaT;
if (threadIdx.x == 0)
{
float dt = cSim.pStepSize[0].y;
oneOverDeltaT = 1.0f/dt;
if (pos == 0)
cSim.pStepSize[0].x = dt;
}
__syncthreads();
#ifdef REMOVE_CM #ifdef REMOVE_CM
extern __shared__ float3 sCM[]; extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f}; float3 CM = {0.0f, 0.0f, 0.0f};
...@@ -118,9 +139,9 @@ __global__ void kVerletUpdatePart2_kernel() ...@@ -118,9 +139,9 @@ __global__ void kVerletUpdatePart2_kernel()
float4 apos = cSim.pPosq[pos]; float4 apos = cSim.pPosq[pos];
float4 xPrime = cSim.pPosqP[pos]; float4 xPrime = cSim.pPosqP[pos];
velocity.x = cSim.oneOverDeltaT*(xPrime.x); velocity.x = oneOverDeltaT*(xPrime.x);
velocity.y = cSim.oneOverDeltaT*(xPrime.y); velocity.y = oneOverDeltaT*(xPrime.y);
velocity.z = cSim.oneOverDeltaT*(xPrime.z); velocity.z = oneOverDeltaT*(xPrime.z);
xPrime.x += apos.x; xPrime.x += apos.x;
xPrime.y += apos.y; xPrime.y += apos.y;
......
...@@ -802,12 +802,11 @@ void ReferenceIntegrateVariableVerletStepKernel::initialize(const System& system ...@@ -802,12 +802,11 @@ void ReferenceIntegrateVariableVerletStepKernel::initialize(const System& system
} }
void ReferenceIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& context, const VariableVerletIntegrator& integrator) { void ReferenceIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& context, const VariableVerletIntegrator& integrator) {
double stepSize = integrator.getStepSize();
double errorTol = integrator.getErrorTolerance(); double errorTol = integrator.getErrorTolerance();
RealOpenMM** posData = ((ReferenceFloatStreamImpl&) context.getPositions().getImpl()).getData(); RealOpenMM** posData = ((ReferenceFloatStreamImpl&) context.getPositions().getImpl()).getData();
RealOpenMM** velData = ((ReferenceFloatStreamImpl&) context.getVelocities().getImpl()).getData(); RealOpenMM** velData = ((ReferenceFloatStreamImpl&) context.getVelocities().getImpl()).getData();
RealOpenMM** forceData = const_cast<RealOpenMM**>(((ReferenceFloatStreamImpl&) context.getForces().getImpl()).getData()); // Reference code needs to be made const correct RealOpenMM** forceData = const_cast<RealOpenMM**>(((ReferenceFloatStreamImpl&) context.getForces().getImpl()).getData()); // Reference code needs to be made const correct
if (dynamics == 0 || stepSize != prevStepSize || errorTol != prevErrorTol) { if (dynamics == 0 || errorTol != prevErrorTol) {
// Recreate the computation objects with the new parameters. // Recreate the computation objects with the new parameters.
if (dynamics) { if (dynamics) {
...@@ -819,7 +818,6 @@ void ReferenceIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& cont ...@@ -819,7 +818,6 @@ void ReferenceIntegrateVariableVerletStepKernel::execute(OpenMMContextImpl& cont
findAnglesForCCMA(context.getSystem(), angles); findAnglesForCCMA(context.getSystem(), angles);
constraints = new ReferenceCCMAAlgorithm(context.getSystem().getNumParticles(), numConstraints, constraintIndices, constraintDistances, masses, angles, (RealOpenMM)integrator.getConstraintTolerance()); constraints = new ReferenceCCMAAlgorithm(context.getSystem().getNumParticles(), numConstraints, constraintIndices, constraintDistances, masses, angles, (RealOpenMM)integrator.getConstraintTolerance());
dynamics->setReferenceConstraintAlgorithm(constraints); dynamics->setReferenceConstraintAlgorithm(constraints);
prevStepSize = stepSize;
prevErrorTol = errorTol; prevErrorTol = errorTol;
} }
dynamics->update(context.getSystem().getNumParticles(), posData, velData, forceData, masses); dynamics->update(context.getSystem().getNumParticles(), posData, velData, forceData, masses);
......
...@@ -472,7 +472,7 @@ private: ...@@ -472,7 +472,7 @@ private:
RealOpenMM* constraintDistances; RealOpenMM* constraintDistances;
int** constraintIndices; int** constraintIndices;
int numConstraints; int numConstraints;
double prevStepSize, prevErrorTol; double prevErrorTol;
}; };
/** /**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment