Commit bc00090d authored by Peter Eastman's avatar Peter Eastman
Browse files

Created CUDA implementation of VariableLangevinIntegrator

parent 68e02e80
......@@ -57,6 +57,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaIntegrateBrownianStepKernel(name, platform, data);
if (name == IntegrateVariableVerletStepKernel::Name())
return new CudaIntegrateVariableVerletStepKernel(name, platform, data);
if (name == IntegrateVariableLangevinStepKernel::Name())
return new CudaIntegrateVariableLangevinStepKernel(name, platform, data);
if (name == ApplyAndersenThermostatKernel::Name())
return new CudaApplyAndersenThermostatKernel(name, platform, data);
if (name == CalcKineticEnergyKernel::Name())
......
......@@ -493,7 +493,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
// Initialize the GPU parameters.
double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
gpuSetIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature);
gpuSetLangevinIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature, 0.0f);
gpuSetConstants(gpu);
kGenerateRandoms(gpu);
prevTemp = temperature;
......@@ -589,6 +589,53 @@ void CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const
data.stepCount++;
}
CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() {
}
void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
initializeIntegration(system, data, integrator);
_gpuContext* gpu = data.gpu;
gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
gpuInitializeRandoms(gpu);
prevErrorTol = -1.0;
}
void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
_gpuContext* gpu = data.gpu;
double temperature = integrator.getTemperature();
double friction = integrator.getFriction();
double errorTol = integrator.getErrorTolerance();
if (temperature != prevTemp || friction != prevFriction || errorTol != prevErrorTol) {
// Initialize the GPU parameters.
double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
gpuSetLangevinIntegrationParameters(gpu, (float) tau, 0.0f, (float) temperature, errorTol);
gpuSetConstants(gpu);
kGenerateRandoms(gpu);
prevTemp = temperature;
prevFriction = friction;
prevErrorTol = errorTol;
}
float maxStepSize = (float)(maxTime-data.time);
kSelectLangevinStepSize(gpu, maxStepSize);
kLangevinUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
kLangevinUpdatePart2(gpu);
kApplySecondShake(gpu);
kApplySecondSettle(gpu);
kApplySecondCCMA(gpu);
gpu->psStepSize->Download();
data.time += (*gpu->psStepSize)[0].y;
if ((*gpu->psStepSize)[0].y == maxStepSize)
data.time = maxTime; // Avoid round-off error
data.stepCount++;
}
CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
}
......
......@@ -401,6 +401,34 @@ private:
double prevErrorTol;
};
/**
* This kernel is invoked by VariableLangevinIntegrator to take one time step.
*/
class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
public:
CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) : IntegrateVariableLangevinStepKernel(name, platform), data(data) {
}
~CudaIntegrateVariableLangevinStepKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the VariableLangevinIntegrator this kernel will be used for
*/
void initialize(const System& system, const VariableLangevinIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VariableLangevinIntegrator this kernel is being used for
* @param maxTime the maximum time beyond which the simulation should not be advanced
*/
void execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime);
private:
CudaPlatform::PlatformData& data;
double prevTemp, prevFriction, prevErrorTol;
};
/**
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
*/
......
......@@ -58,6 +58,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
......
......@@ -48,6 +48,7 @@ extern void kApplySecondCCMA(gpuContext gpu);
extern void kApplySecondSettle(gpuContext gpu);
extern void kLangevinUpdatePart1(gpuContext gpu);
extern void kLangevinUpdatePart2(gpuContext gpu);
extern void kSelectLangevinStepSize(gpuContext gpu, float maxTimeStep);
extern void kVerletUpdatePart1(gpuContext gpu);
extern void kVerletUpdatePart2(gpuContext gpu);
extern void kSelectVerletStepSize(gpuContext gpu, float maxTimeStep);
......
......@@ -269,6 +269,7 @@ struct cudaGmxSimulation {
unsigned int* pInteractingWorkUnit; // Pointer to work units that have interactions
unsigned int* pInteractionFlag; // Flags for which work units have interactions
float2* pStepSize; // The size of the previous and current time steps
float* pLangevinParameters; // Parameters used for Langevin integration
float errorTol; // Error tolerance for selecting the step size
size_t* pInteractionCount; // A count of the number of work units which have interactions
unsigned int nonbond_workBlock; // Number of work units running simultaneously per block in CDLJ and Born Force Part 1
......@@ -315,27 +316,11 @@ struct cudaGmxSimulation {
float gammaOBC; // OBC gamma factor
float deltaT; // Molecular dynamics deltaT constant
float oneOverDeltaT; // 1/deltaT
float B; // Molecular dynamics B constant
float C; // Molecular dynamics C constant
float D; // Molecular dynamics D constant
float EPH; // Molecular dynamics EPH constant
float EMH; // Molecular dynamics EMH constant
float EM; // Molecular dynamics EM constant
float EP; // Molecular dynamics EP constant
float GDT; // Molecular dynamics GDT constant
float OneMinusEM; // Molecular dynamics OneMinusEM constant
float TauOneMinusEM; // Molecular dynamics TauOneMinusEM constant
float TauDOverEMMinusOne; // Molecular dynamics TauDOverEMMinusOne constant
float T; // Molecular dynamics T constant
float T; // Temperature
float kT; // Boltzmann's constant times T
float V; // Molecular dynamics V constant
float X; // Molecular dynamics X constant
float Yv; // Molecular dynamics Yv constant
float Yx; // Molecular dynamics Yx constant
float tau; // Molecular dynamics tau constant
float fix1; // Molecular dynamics fix1 constant
float oneOverFix1; // Molecular dynamics reciprocal of fix1 constant
float DOverTauC; // Molecular dynamics DOverTauC constant
float noiseAmplitude; // The magnitude of the noise for Brownian dynamics
float tau; // Inverse friction for Langevin or Brownian dynamics
float tauDeltaT; // tau*deltaT
float collisionFrequency; // Collision frequency for Andersen thermostat
float2* pObcData; // Pointer to fixed Born data
float2* pAttr; // Pointer to additional atom attributes (sig, eps)
......
......@@ -1004,6 +1004,8 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pStepSize = gpu->psStepSize->_pDevStream[0];
(*gpu->psStepSize)[0] = make_float2(0.0f, 0.0f);
gpu->psStepSize->Upload();
gpu->psLangevinParameters = new CUDAStream<float>(11, 1, "LangevinParameters");
gpu->sim.pLangevinParameters = gpu->psLangevinParameters->_pDevStream[0];
gpu->pAtomSymbol = new unsigned char[gpu->natoms];
gpu->psAtomIndex = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex");
gpu->sim.pAtomIndex = gpu->psAtomIndex->_pDevStream[0];
......@@ -1196,8 +1198,8 @@ void* gpuInit(int numAtoms, unsigned int device)
gpu->sim.update_threads_per_block = (gpu->natoms + gpu->sim.blocks - 1) / gpu->sim.blocks;
if (gpu->sim.update_threads_per_block > gpu->sim.max_update_threads_per_block)
gpu->sim.update_threads_per_block = gpu->sim.max_update_threads_per_block;
if (gpu->sim.update_threads_per_block < 1)
gpu->sim.update_threads_per_block = 1;
if (gpu->sim.update_threads_per_block < gpu->psLangevinParameters->_length)
gpu->sim.update_threads_per_block = gpu->psLangevinParameters->_length;
gpu->sim.bf_reduce_threads_per_block = gpu->sim.update_threads_per_block;
gpu->sim.bsf_reduce_threads_per_block = (gpu->sim.stride4 + gpu->natoms + gpu->sim.blocks - 1) / gpu->sim.blocks;
gpu->sim.bsf_reduce_threads_per_block = ((gpu->sim.bsf_reduce_threads_per_block + (GRID - 1)) / GRID) * GRID;
......@@ -1220,7 +1222,7 @@ void* gpuInit(int numAtoms, unsigned int device)
gpu->sim.alphaOBC = alphaOBC;
gpu->sim.betaOBC = betaOBC;
gpu->sim.gammaOBC = gammaOBC;
gpuSetIntegrationParameters(gpu, 1.0f, 2.0e-3f, 300.0f);
gpuSetLangevinIntegrationParameters(gpu, 1.0f, 2.0e-3f, 300.0f, 0.0f);
gpu->sim.maxShakeIterations = 15;
gpu->sim.shakeTolerance = 1.0e-04f * 2.0f;
gpu->sim.InvMassJ = 9.920635e-001f;
......@@ -1288,28 +1290,30 @@ void* gpuInit(int numAtoms, unsigned int device)
}
extern "C"
void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature) {
void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature, float errorTol) {
gpu->sim.deltaT = deltaT;
gpu->sim.oneOverDeltaT = 1.0f/deltaT;
gpu->sim.errorTol = errorTol;
gpu->sim.tau = tau;
gpu->sim.GDT = gpu->sim.deltaT / gpu->sim.tau;
gpu->sim.EPH = exp(0.5f * gpu->sim.GDT);
gpu->sim.EMH = exp(-0.5f * gpu->sim.GDT);
gpu->sim.EP = exp(gpu->sim.GDT);
gpu->sim.EM = exp(-gpu->sim.GDT);
gpu->sim.OneMinusEM = 1.0f - gpu->sim.EM;
gpu->sim.TauOneMinusEM = gpu->sim.tau * gpu->sim.OneMinusEM;
if (gpu->sim.GDT >= 0.1f)
gpu->sim.T = temperature;
gpu->sim.kT = BOLTZ * gpu->sim.T;
float GDT = gpu->sim.deltaT / gpu->sim.tau;
float EPH = exp(0.5f * GDT);
float EMH = exp(-0.5f * GDT);
float EP = exp(GDT);
float EM = exp(-GDT);
float B, C, D;
if (GDT >= 0.1f)
{
float term1 = gpu->sim.EPH - 1.0f;
float term1 = EPH - 1.0f;
term1 *= term1;
gpu->sim.B = gpu->sim.GDT * (gpu->sim.EP - 1.0f) - 4.0f * term1;
gpu->sim.C = gpu->sim.GDT - 3.0f + 4.0f * gpu->sim.EMH - gpu->sim.EM;
gpu->sim.D = 2.0f - gpu->sim.EPH - gpu->sim.EMH;
B = GDT * (EP - 1.0f) - 4.0f * term1;
C = GDT - 3.0f + 4.0f * EMH - EM;
D = 2.0f - EPH - EMH;
}
else
{
float term1 = 0.5f * gpu->sim.GDT;
float term1 = 0.5f * GDT;
float term2 = term1 * term1;
float term4 = term2 * term2;
......@@ -1321,20 +1325,33 @@ void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float
float o31_1260 = 31.0f / 1260.0f;
float o_360 = 1.0f / 360.0f;
gpu->sim.B = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
gpu->sim.C = term2 * term1 * (2.0f * third + term1 * (-0.5f + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
gpu->sim.D = term2 * (-1.0f + term2 * (-o1_12 - term2 * o_360));
}
gpu->sim.TauDOverEMMinusOne = gpu->sim.tau * gpu->sim.D / (gpu->sim.EM - 1.0f);
gpu->sim.DOverTauC = gpu->sim.D / (gpu->sim.tau * gpu->sim.C);
gpu->sim.fix1 = gpu->sim.tau * (gpu->sim.EPH - gpu->sim.EMH);
gpu->sim.oneOverFix1 = 1.0f / (gpu->sim.tau * (gpu->sim.EPH - gpu->sim.EMH));
gpu->sim.T = temperature;
gpu->sim.kT = BOLTZ * gpu->sim.T;
gpu->sim.V = sqrt(gpu->sim.kT * (1.0f - gpu->sim.EM));
gpu->sim.X = gpu->sim.tau * sqrt(gpu->sim.kT * gpu->sim.C);
gpu->sim.Yv = sqrt(gpu->sim.kT * gpu->sim.B / gpu->sim.C);
gpu->sim.Yx = gpu->sim.tau * sqrt(gpu->sim.kT * gpu->sim.B / (1.0f - gpu->sim.EM));
B = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
C = term2 * term1 * (2.0f * third + term1 * (-0.5f + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
D = term2 * (-1.0f + term2 * (-o1_12 - term2 * o_360));
}
float DOverTauC = D / (gpu->sim.tau * C);
float TauOneMinusEM = gpu->sim.tau * (1.0f-EM);
float TauDOverEMMinusOne = gpu->sim.tau * D / (EM - 1.0f);
float fix1 = gpu->sim.tau * (EPH - EMH);
if (fix1 == 0.0f)
fix1 = deltaT;
float oneOverFix1 = 1.0f / fix1;
float V = sqrt(gpu->sim.kT * (1.0f - EM));
float X = gpu->sim.tau * sqrt(gpu->sim.kT * C);
float Yv = sqrt(gpu->sim.kT * B / C);
float Yx = gpu->sim.tau * sqrt(gpu->sim.kT * B / (1.0f - EM));
(*gpu->psLangevinParameters)[0] = EM;
(*gpu->psLangevinParameters)[1] = EM;
(*gpu->psLangevinParameters)[2] = DOverTauC;
(*gpu->psLangevinParameters)[3] = TauOneMinusEM;
(*gpu->psLangevinParameters)[4] = TauDOverEMMinusOne;
(*gpu->psLangevinParameters)[5] = V;
(*gpu->psLangevinParameters)[6] = X;
(*gpu->psLangevinParameters)[7] = Yv;
(*gpu->psLangevinParameters)[8] = Yx;
(*gpu->psLangevinParameters)[9] = fix1;
(*gpu->psLangevinParameters)[10] = oneOverFix1;
gpu->psLangevinParameters->Upload();
gpu->psStepSize->Download();
(*gpu->psStepSize)[0].y = deltaT;
gpu->psStepSize->Upload();
......@@ -1355,10 +1372,10 @@ void gpuSetBrownianIntegrationParameters(gpuContext gpu, float tau, float deltaT
gpu->sim.deltaT = deltaT;
gpu->sim.oneOverDeltaT = 1.0f/deltaT;
gpu->sim.tau = tau;
gpu->sim.GDT = gpu->sim.deltaT * gpu->sim.tau;
gpu->sim.tauDeltaT = gpu->sim.deltaT * gpu->sim.tau;
gpu->sim.T = temperature;
gpu->sim.kT = BOLTZ * gpu->sim.T;
gpu->sim.Yv = gpu->sim.Yx = sqrt(2.0f*gpu->sim.kT*deltaT*tau);
gpu->sim.noiseAmplitude = sqrt(2.0f*gpu->sim.kT*deltaT*tau);
gpu->psStepSize->Download();
(*gpu->psStepSize)[0].y = deltaT;
gpu->psStepSize->Upload();
......@@ -1369,8 +1386,6 @@ void gpuSetAndersenThermostatParameters(gpuContext gpu, float temperature, float
gpu->sim.T = temperature;
gpu->sim.kT = BOLTZ * gpu->sim.T;
gpu->sim.collisionFrequency = collisionFrequency;
gpu->sim.Yv = gpu->sim.Yx = 1.0f;
gpu->sim.V = gpu->sim.X = 1.0f;
}
extern "C"
......@@ -1421,6 +1436,8 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psInteractingWorkUnit;
delete gpu->psInteractionFlag;
delete gpu->psInteractionCount;
delete gpu->psStepSize;
delete gpu->psLangevinParameters;
delete gpu->psRandom4;
delete gpu->psRandom2;
delete gpu->psRandomPosition;
......
......@@ -119,6 +119,7 @@ struct _gpuContext {
CUDAStream<unsigned int>* psInteractionFlag;
CUDAStream<size_t>* psInteractionCount;
CUDAStream<float2>* psStepSize; // The size of the previous and current time steps
CUDAStream<float>* psLangevinParameters;// Parameters used for Langevin integration
CUDAStream<float4>* psRandom4; // Pointer to sets of 4 random numbers for MD integration
CUDAStream<float2>* psRandom2; // Pointer to sets of 2 random numbers for MD integration
CUDAStream<uint4>* psRandomSeed; // Pointer to each random seed
......@@ -208,7 +209,7 @@ extern "C"
void* gpuInit(int numAtoms, unsigned int device);
extern "C"
void gpuSetIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature);
void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature, float errorTol);
extern "C"
void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT, float errorTol);
......
......@@ -64,9 +64,9 @@ __global__ void kBrownianUpdatePart1_kernel()
float4 force = cSim.pForce4[pos];
cSim.pOldPosq[pos] = apos;
apos.x = force.x*cSim.GDT + random4a.x;
apos.y = force.y*cSim.GDT + random4a.y;
apos.z = force.z*cSim.GDT + random4a.z;
apos.x = force.x*cSim.tauDeltaT + cSim.noiseAmplitude*random4a.x;
apos.y = force.y*cSim.tauDeltaT + cSim.noiseAmplitude*random4a.y;
apos.z = force.z*cSim.tauDeltaT + cSim.noiseAmplitude*random4a.z;
cSim.pPosqP[pos] = apos;
pos += blockDim.x * gridDim.x;
}
......
......@@ -34,6 +34,8 @@ using namespace std;
#include "gputypes.h"
enum {EM, EM_V, DOverTauC, TauOneMinusEM_V, TauDOverEMMinusOne, V, X, Yv, Yx, Fix1, OneOverFix1, MaxParams};
static __constant__ cudaGmxSimulation cSim;
void SetLangevinUpdateSim(gpuContext gpu)
......@@ -98,3 +100,105 @@ void kLangevinUpdatePart2(gpuContext gpu)
}
}
__global__ void kSelectLangevinStepSize_kernel(float maxStepSize)
{
// Calculate the error.
extern __shared__ float error[];
__shared__ float params[MaxParams];
error[threadIdx.x] = 0.0f;
unsigned int pos = threadIdx.x;
while (pos < cSim.atoms)
{
float4 force = cSim.pForce4[pos];
float invMass = cSim.pVelm4[pos].w;
error[threadIdx.x] += (force.x*force.x + force.y*force.y + force.z*force.z)*invMass;
pos += blockDim.x * gridDim.x;
}
__syncthreads();
// Sum the errors from all threads.
for (int offset = 1; offset < blockDim.x; offset *= 2)
{
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
error[threadIdx.x] += error[threadIdx.x+offset];
__syncthreads();
}
if (threadIdx.x == 0)
{
// Select the new step size.
float totalError = sqrt(error[0]/(cSim.atoms*3));
float newStepSize = sqrt(cSim.errorTol/totalError);
float oldStepSize = cSim.pStepSize[0].y;
if (oldStepSize > 0.0f)
newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
if (newStepSize > maxStepSize)
newStepSize = maxStepSize;
cSim.pStepSize[0].y = newStepSize;
// Recalculate the integration parameters.
float gdt = newStepSize / cSim.tau;
float eph = exp(0.5f * gdt);
float emh = exp(-0.5f * gdt);
float ep = exp(gdt);
float em = exp(-gdt);
float em_v = exp(-0.5f*(oldStepSize+newStepSize)/cSim.tau);
float b, c, d;
if (gdt >= 0.1f)
{
float term1 = eph - 1.0f;
term1 *= term1;
b = gdt * (ep - 1.0f) - 4.0f * term1;
c = gdt - 3.0f + 4.0f * emh - em;
d = 2.0f - eph - emh;
}
else
{
float term1 = 0.5f * gdt;
float term2 = term1 * term1;
float term4 = term2 * term2;
float third = 1.0f / 3.0f;
float o7_9 = 7.0f / 9.0f;
float o1_12 = 1.0f / 12.0f;
float o17_90 = 17.0f / 90.0f;
float o7_30 = 7.0f / 30.0f;
float o31_1260 = 31.0f / 1260.0f;
float o_360 = 1.0f / 360.0f;
b = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
c = term2 * term1 * (2.0f * third + term1 * (-0.5f + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
d = term2 * (-1.0f + term2 * (-o1_12 - term2 * o_360));
}
float fix1 = cSim.tau * (eph - emh);
if (fix1 == 0.0f)
fix1 = newStepSize;
params[EM] = em;
params[EM_V] = em_v;
params[DOverTauC] = d / (cSim.tau * c);
params[TauOneMinusEM_V] = cSim.tau * (1.0f-em_v);
params[TauDOverEMMinusOne] = cSim.tau * d / (em - 1.0f);
params[Fix1] = fix1;
params[OneOverFix1] = 1.0f / fix1;
params[V] = sqrt(cSim.kT * (1.0f - em));
params[X] = cSim.tau * sqrt(cSim.kT * c);
params[Yv] = sqrt(cSim.kT * b / c);
params[Yx] = cSim.tau * sqrt(cSim.kT * b / (1.0f - em));
}
__syncthreads();
if (threadIdx.x < MaxParams)
cSim.pLangevinParameters[threadIdx.x] = params[threadIdx.x];
}
void kSelectLangevinStepSize(gpuContext gpu, float maxTimeStep)
{
// printf("kSelectLangevinStepSize\n");
kSelectLangevinStepSize_kernel<<<1, gpu->sim.update_threads_per_block, sizeof(float)*gpu->sim.update_threads_per_block>>>(maxTimeStep);
LAUNCHERROR("kSelectLangevinStepSize");
}
......@@ -37,6 +37,10 @@ __global__ void kLangevinUpdatePart1CM_kernel()
__global__ void kLangevinUpdatePart1_kernel()
#endif
{
__shared__ float params[MaxParams];
if (threadIdx.x < MaxParams)
params[threadIdx.x] = cSim.pLangevinParameters[threadIdx.x];
__syncthreads();
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
#ifdef REMOVE_CM
......@@ -87,36 +91,36 @@ __global__ void kLangevinUpdatePart1_kernel()
float3 Vmh;
float sqrtInvMass = sqrt(velocity.w);
Vmh.x = xVector.x * cSim.DOverTauC + sqrtInvMass * random4a.x;
Vmh.y = xVector.y * cSim.DOverTauC + sqrtInvMass * random4a.y;
Vmh.z = xVector.z * cSim.DOverTauC + sqrtInvMass * random4a.z;
Vmh.x = xVector.x * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.x;
Vmh.y = xVector.y * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.y;
Vmh.z = xVector.z * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.z;
float4 vVector;
vVector.x = sqrtInvMass * random4a.w;
vVector.y = sqrtInvMass * random2a.x;
vVector.z = sqrtInvMass * random2a.y;
vVector.x = sqrtInvMass * params[V] * random4a.w;
vVector.y = sqrtInvMass * params[V] * random2a.x;
vVector.z = sqrtInvMass * params[V] * random2a.y;
vVector.w = 0.0f;
cSim.pvVector4[pos] = vVector;
velocity.x = velocity.x * cSim.EM +
velocity.w * force.x * cSim.TauOneMinusEM +
velocity.x = velocity.x * params[EM_V] +
velocity.w * force.x * params[TauOneMinusEM_V] +
vVector.x -
cSim.EM * Vmh.x;
velocity.y = velocity.y * cSim.EM +
velocity.w * force.y * cSim.TauOneMinusEM +
params[EM] * Vmh.x;
velocity.y = velocity.y * params[EM_V] +
velocity.w * force.y * params[TauOneMinusEM_V] +
vVector.y -
cSim.EM * Vmh.y;
velocity.z = velocity.z * cSim.EM +
velocity.w * force.z * cSim.TauOneMinusEM +
params[EM] * Vmh.y;
velocity.z = velocity.z * params[EM_V] +
velocity.w * force.z * params[TauOneMinusEM_V] +
vVector.z -
cSim.EM * Vmh.z;
params[EM] * Vmh.z;
#ifdef REMOVE_CM
velocity.x -= sCM[0].x;
velocity.y -= sCM[0].y;
velocity.z -= sCM[0].z;
#endif
cSim.pOldPosq[pos] = apos;
apos.x = velocity.x * cSim.fix1;
apos.y = velocity.y * cSim.fix1;
apos.z = velocity.z * cSim.fix1;
apos.x = velocity.x * params[Fix1];
apos.y = velocity.y * params[Fix1];
apos.z = velocity.z * params[Fix1];
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
......@@ -129,6 +133,10 @@ __global__ void kLangevinUpdatePart2CM_kernel()
__global__ void kLangevinUpdatePart2_kernel()
#endif
{
__shared__ float params[MaxParams];
if (threadIdx.x < MaxParams)
params[threadIdx.x] = cSim.pLangevinParameters[threadIdx.x];
__syncthreads();
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
#ifdef REMOVE_CM
......@@ -147,9 +155,9 @@ __global__ void kLangevinUpdatePart2_kernel()
float2 random2b = cSim.pRandom2b[rpos + pos];
float3 Xmh;
float sqrtInvMass = sqrt(velocity.w);
velocity.x = xPrime.x * cSim.oneOverFix1;
velocity.y = xPrime.y * cSim.oneOverFix1;
velocity.z = xPrime.z * cSim.oneOverFix1;
velocity.x = xPrime.x * params[OneOverFix1];
velocity.y = xPrime.y * params[OneOverFix1];
velocity.z = xPrime.z * params[OneOverFix1];
#ifdef REMOVE_CM
float mass = 1.0f / velocity.w;
CM.x += mass * velocity.x;
......@@ -157,15 +165,15 @@ __global__ void kLangevinUpdatePart2_kernel()
CM.z += mass * velocity.z;
#endif
Xmh.x = vVector.x * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.x;
Xmh.y = vVector.y * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.y;
Xmh.z = vVector.z * cSim.TauDOverEMMinusOne +
sqrtInvMass * random4b.z;
xVector.x = sqrtInvMass * random4b.w;
xVector.y = sqrtInvMass * random2b.x;
xVector.z = sqrtInvMass * random2b.y;
Xmh.x = vVector.x * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.x;
Xmh.y = vVector.y * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.y;
Xmh.z = vVector.z * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.z;
xVector.x = sqrtInvMass * params[X] * random4b.w;
xVector.y = sqrtInvMass * params[X] * random2b.x;
xVector.z = sqrtInvMass * params[X] * random2b.y;
xPrime.x += xVector.x - Xmh.x;
xPrime.y += xVector.y - Xmh.y;
xPrime.z += xVector.z - Xmh.z;
......
......@@ -144,27 +144,15 @@ __global__ void kGenerateRandoms_kernel()
}
// Output final randoms
float c1, c2;
if (pos < cSim.totalRandoms)
{
c1 = cSim.Yv;
c2 = cSim.V;
}
else
{
c1 = cSim.Yx;
c2 = cSim.X;
}
random4.x = c1 * sRand[threadIdx.x].x;
random4.y = c1 * sRand[threadIdx.x].y;
random4.z = c1 * sRand[threadIdx.x].z;
random4.w = c2 * sRand[threadIdx.x + blockDim.x].x;
random4.x = sRand[threadIdx.x].x;
random4.y = sRand[threadIdx.x].y;
random4.z = sRand[threadIdx.x].z;
random4.w = sRand[threadIdx.x + blockDim.x].x;
cSim.pRandom4a[pos] = random4;
random2.x = c2 * sRand[threadIdx.x + blockDim.x].y;
random2.y = c2 * sRand[threadIdx.x + blockDim.x].z;
random2.x = sRand[threadIdx.x + blockDim.x].y;
random2.y = sRand[threadIdx.x + blockDim.x].z;
cSim.pRandom2a[pos] = random2;
pos += increment;
}
......
......@@ -49,10 +49,6 @@ static const float BOLTZ = (RGAS / KILO); // (k
void testGaussian() {
_gpuContext* gpu = (_gpuContext*) gpuInit(5000, 0);
gpu->sim.Yv = 1.0;
gpu->sim.Yx = 1.0;
gpu->sim.V = 1.0;
gpu->sim.X = 1.0;
gpuSetConstants(gpu);
kGenerateRandoms(gpu);
const int numValues = 4*gpu->psRandom4->_length;
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of VariableLangevinIntegrator.
*/
#include "../../../tests/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VariableLangevinIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "../src/sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testSingleBond() {
CudaPlatform platform;
System system;
system.addParticle(2.0);
system.addParticle(2.0);
VariableLangevinIntegrator integrator(0, 0.1, 1e-6);
HarmonicBondForce* forceField = new HarmonicBondForce();
forceField->addBond(0, 1, 1.5, 1);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
// This is simply a damped harmonic oscillator, so compare it to the analytical solution.
double freq = std::sqrt(1-0.05*0.05);
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Velocities);
double time = state.getTime();
double expectedDist = 1.5+0.5*std::exp(-0.05*time)*std::cos(freq*time);
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
double expectedSpeed = -0.5*std::exp(-0.05*time)*(0.05*std::cos(freq*time)+freq*std::sin(freq*time));
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.02);
integrator.step(1);
}
// Now set the friction to a tiny value and see if it conserves energy.
integrator.setFriction(5e-5);
context.setPositions(positions);
State state = context.getState(State::Energy);
double initialEnergy = state.getKineticEnergy()+state.getPotentialEnergy();
for (int i = 0; i < 1000; ++i) {
state = context.getState(State::Energy);
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
integrator.step(1);
}
}
void testTemperature() {
const int numParticles = 8;
const double temp = 100.0;
CudaPlatform platform;
System system;
VariableLangevinIntegrator integrator(temp, 2.0, 1e-4);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; ++i)
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
context.setPositions(positions);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the temperature is correct.
double ke = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Energy);
ke += state.getKineticEnergy();
integrator.step(1);
}
ke /= 1000;
double expected = 0.5*numParticles*3*BOLTZ*temp;
ASSERT_EQUAL_TOL(expected, ke, 3*expected/std::sqrt(1000.0));
}
void testConstraints() {
const int numParticles = 8;
const double temp = 100.0;
CudaPlatform platform;
System system;
VariableLangevinIntegrator integrator(temp, 2.0, 1e-5);
integrator.setConstraintTolerance(1e-5);
integrator.setRandomNumberSeed(0);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
for (int i = 0; i < numParticles-1; ++i)
system.addConstraint(i, i+1, 1.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
init_gen_rand(0);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3(i/2, (i+1)/2, 0);
velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions);
for (int j = 0; j < numParticles-1; ++j) {
Vec3 p1 = state.getPositions()[j];
Vec3 p2 = state.getPositions()[j+1];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(1.0, dist, 2e-5);
}
integrator.step(1);
}
}
void testRandomSeed() {
const int numParticles = 8;
const double temp = 100.0;
CudaPlatform platform;
System system;
VariableLangevinIntegrator integrator(temp, 2.0, 1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
velocities[i] = Vec3(0, 0, 0);
}
// Try twice with the same random seed.
integrator.setRandomNumberSeed(5);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state1 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state2 = context.getState(State::Positions);
// Try twice with a different random seed.
integrator.setRandomNumberSeed(10);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state3 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state4 = context.getState(State::Positions);
// Compare the results.
for (int i = 0; i < numParticles; i++) {
for (int j = 0; j < 3; j++) {
ASSERT(state1.getPositions()[i][j] == state2.getPositions()[i][j]);
ASSERT(state3.getPositions()[i][j] == state4.getPositions()[i][j]);
ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
}
}
}
int main() {
try {
testSingleBond();
testTemperature();
testConstraints();
testRandomSeed();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
......@@ -362,6 +362,8 @@ int ReferenceVariableStochasticDynamics::updatePart1( int numberOfAtoms, RealOpe
const RealOpenMM* fixedParameters = getFixedParameters();
RealOpenMM tau = getTau();
RealOpenMM fix1 = tau*(fixedParameters[EPH] - fixedParameters[EMH]);
if (fix1 == zero)
fix1 = getDeltaT();
for( int ii = 0; ii < numberOfAtoms; ii++ ){
......@@ -412,6 +414,7 @@ int ReferenceVariableStochasticDynamics::updatePart2( int numberOfAtoms, RealOpe
static const char* methodName = "\nReferenceVariableStochasticDynamics::updatePart2";
static const RealOpenMM zero = 0.0;
static const RealOpenMM one = 1.0;
static int debug = 0;
......@@ -422,6 +425,8 @@ int ReferenceVariableStochasticDynamics::updatePart2( int numberOfAtoms, RealOpe
const RealOpenMM* fixedParameters = getFixedParameters();
RealOpenMM tau = getTau();
RealOpenMM fix1 = tau*(fixedParameters[EPH] - fixedParameters[EMH]);
if (fix1 == zero)
fix1 = getDeltaT();
fix1 = one/fix1;
for( int ii = 0; ii < numberOfAtoms; ii++ ){
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment