Commit 8725cede authored by Peter Eastman's avatar Peter Eastman
Browse files

Converted Cuda platform to use improved Langevin integrator

parent 15a8478e
......@@ -869,9 +869,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
prevStepSize = stepSize;
}
kLangevinUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
......@@ -991,9 +988,6 @@ void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, cons
float maxStepSize = (float)(maxTime-data.time);
kSelectLangevinStepSize(gpu, maxStepSize);
kLangevinUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true;
......
......@@ -471,8 +471,6 @@ struct cudaGmxSimulation {
float4* pPosqP; // Pointer to mid-integration atom positions
float4* pOldPosq; // Pointer to old atom positions
float4* pVelm4; // Pointer to atom velocity and inverse mass
float4* pvVector4; // Pointer to atom v Vector
float4* pxVector4; // Pointer to atom x Vector
float4* pForce4; // Pointer to force data
float* pEnergy; // Pointer to energy output buffer
float* pBornForce; // Pointer to Born force data
......@@ -482,15 +480,12 @@ struct cudaGmxSimulation {
float4* pLinearMomentum; // Pointer to linear momentum
// Random numbers
float4* pRandom4a; // Pointer to first set of 4 random numbers
float4* pRandom4b; // Pointer to second set of 4 random numbers
float2* pRandom2a; // Pointer to first set of 2 random numbers
float2* pRandom2b; // Pointer to second set of 2 random numbers
float4* pRandom4; // Pointer to 4 random numbers
float2* pRandom2; // Pointer to 2 random numbers
uint4* pRandomSeed; // Pointer to random seeds
int* pRandomPosition; // Pointer to random number positions
unsigned int randoms; // Number of randoms
unsigned int totalRandoms; // Number of randoms plus overflow.
unsigned int totalRandomsTimesTwo; // Used for generating randoms
unsigned int randomIterations; // Number of iterations before regenerating randoms
unsigned int randomFrames; // Number of frames of random numbers
};
......
......@@ -1651,10 +1651,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pOldPosq = gpu->psOldPosq4->_pDevStream[0];
gpu->psVelm4 = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "Velm");
gpu->sim.pVelm4 = gpu->psVelm4->_pDevStream[0];
gpu->psvVector4 = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "vVector");
gpu->sim.pvVector4 = gpu->psvVector4->_pDevStream[0];
gpu->psxVector4 = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "xVector");
gpu->sim.pxVector4 = gpu->psxVector4->_pDevStream[0];
gpu->psBornRadii = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "BornRadii");
gpu->sim.pBornRadii = gpu->psBornRadii->_pDevStream[0];
gpu->psObcChain = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "ObcChain");
......@@ -1669,7 +1665,7 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pStepSize = gpu->psStepSize->_pDevStream[0];
(*gpu->psStepSize)[0] = make_float2(0.0f, 0.0f);
gpu->psStepSize->Upload();
gpu->psLangevinParameters = new CUDAStream<float>(11, 1, "LangevinParameters");
gpu->psLangevinParameters = new CUDAStream<float>(3, 1, "LangevinParameters");
gpu->sim.pLangevinParameters = gpu->psLangevinParameters->_pDevStream[0];
gpu->pAtomSymbol = new unsigned char[gpu->natoms];
gpu->psAtomIndex = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex");
......@@ -1684,15 +1680,12 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.randomIterations = gpu->sim.randomFrames;
gpu->sim.randoms = gpu->sim.randomFrames * gpu->sim.paddedNumberOfAtoms;
gpu->sim.totalRandoms = gpu->sim.randoms + gpu->sim.paddedNumberOfAtoms;
gpu->sim.totalRandomsTimesTwo = gpu->sim.totalRandoms * 2;
gpu->psRandom4 = new CUDAStream<float4>(gpu->sim.totalRandomsTimesTwo, 1, "Random4");
gpu->psRandom2 = new CUDAStream<float2>(gpu->sim.totalRandomsTimesTwo, 1, "Random2");
gpu->psRandom4 = new CUDAStream<float4>(gpu->sim.totalRandoms, 1, "Random4");
gpu->psRandom2 = new CUDAStream<float2>(gpu->sim.totalRandoms, 1, "Random2");
gpu->psRandomPosition = new CUDAStream<int>(gpu->sim.blocks, 1, "RandomPosition");
gpu->psRandomSeed = new CUDAStream<uint4>(gpu->sim.blocks * gpu->sim.random_threads_per_block, 1, "RandomSeed");
gpu->sim.pRandom4a = gpu->psRandom4->_pDevStream[0];
gpu->sim.pRandom2a = gpu->psRandom2->_pDevStream[0];
gpu->sim.pRandom4b = gpu->psRandom4->_pDevStream[0] + gpu->sim.totalRandoms;
gpu->sim.pRandom2b = gpu->psRandom2->_pDevStream[0] + gpu->sim.totalRandoms;
gpu->sim.pRandom4 = gpu->psRandom4->_pDevStream[0];
gpu->sim.pRandom2 = gpu->psRandom2->_pDevStream[0];
gpu->sim.pRandomPosition = gpu->psRandomPosition->_pDevStream[0];
gpu->sim.pRandomSeed = gpu->psRandomSeed->_pDevStream[0];
......@@ -1884,14 +1877,6 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu->natoms = numAtoms;
gpuAllocateInitialBuffers(gpu);
for (int i = 0; i < gpu->natoms; i++)
{
(*gpu->psxVector4)[i].x = 0.0f;
(*gpu->psxVector4)[i].y = 0.0f;
(*gpu->psxVector4)[i].z = 0.0f;
(*gpu->psxVector4)[i].w = 0.0f;
}
gpu->psxVector4->Upload();
gpu->iterations = 0;
gpu->sim.update_threads_per_block = (gpu->natoms + gpu->sim.blocks - 1) / gpu->sim.blocks;
......@@ -2023,60 +2008,12 @@ void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT
gpu->sim.tau = tau;
gpu->sim.T = temperature;
gpu->sim.kT = BOLTZ * gpu->sim.T;
double GDT = gpu->sim.deltaT / gpu->sim.tau;
double EPH = exp(0.5 * GDT);
double EMH = exp(-0.5 * GDT);
double EP = exp(GDT);
double EM = exp(-GDT);
double B, C, D;
if (GDT >= 0.1)
{
double term1 = EPH - 1.0;
term1 *= term1;
B = GDT * (EP - 1.0) - 4.0 * term1;
C = GDT - 3.0 + 4.0 * EMH - EM;
D = 2.0 - EPH - EMH;
}
else
{
double term1 = 0.5 * GDT;
double term2 = term1 * term1;
double term4 = term2 * term2;
double third = 1.0 / 3.0;
double o7_9 = 7.0 / 9.0;
double o1_12 = 1.0 / 12.0;
double o17_90 = 17.0 / 90.0;
double o7_30 = 7.0 / 30.0;
double o31_1260 = 31.0 / 1260.0;
double o_360 = 1.0 / 360.0;
B = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
C = term2 * term1 * (2.0 * third + term1 * (-0.5 + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
D = term2 * (-1.0 + term2 * (-o1_12 - term2 * o_360));
}
double DOverTauC = D / (gpu->sim.tau * C);
double TauOneMinusEM = gpu->sim.tau * (1.0-EM);
double TauDOverEMMinusOne = gpu->sim.tau * D / (EM - 1.0);
double fix1 = gpu->sim.tau * (EPH - EMH);
if (fix1 == 0.0)
fix1 = deltaT;
double oneOverFix1 = 1.0 / fix1;
double V = sqrt(gpu->sim.kT * (1.0 - EM));
double X = gpu->sim.tau * sqrt(gpu->sim.kT * C);
double Yv = sqrt(gpu->sim.kT * B / C);
double Yx = gpu->sim.tau * sqrt(gpu->sim.kT * B / (1.0 - EM));
(*gpu->psLangevinParameters)[0] = (float) EM;
(*gpu->psLangevinParameters)[1] = (float) EM;
(*gpu->psLangevinParameters)[2] = (float) DOverTauC;
(*gpu->psLangevinParameters)[3] = (float) TauOneMinusEM;
(*gpu->psLangevinParameters)[4] = (float) TauDOverEMMinusOne;
(*gpu->psLangevinParameters)[5] = (float) V;
(*gpu->psLangevinParameters)[6] = (float) X;
(*gpu->psLangevinParameters)[7] = (float) Yv;
(*gpu->psLangevinParameters)[8] = (float) Yx;
(*gpu->psLangevinParameters)[9] = (float) fix1;
(*gpu->psLangevinParameters)[10] = (float) oneOverFix1;
double vscale = exp(-deltaT/tau);
double fscale = (1-vscale)*tau;
double noisescale = sqrt(2*gpu->sim.kT/tau)*sqrt(0.5*(1-vscale*vscale)*tau);
(*gpu->psLangevinParameters)[0] = (float) vscale;
(*gpu->psLangevinParameters)[1] = (float) fscale;
(*gpu->psLangevinParameters)[2] = (float) noisescale;
gpu->psLangevinParameters->Upload();
gpu->psStepSize->Download();
(*gpu->psStepSize)[0].y = deltaT;
......@@ -2129,8 +2066,6 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psVelm4;
delete gpu->psForce4;
delete gpu->psEnergy;
delete gpu->psxVector4;
delete gpu->psvVector4;
delete gpu->psSigEps2;
if (gpu->psCustomParams != NULL)
delete gpu->psCustomParams;
......
......@@ -101,8 +101,6 @@ struct _gpuContext {
CUDAStream<float4>* psVelm4;
CUDAStream<float4>* psForce4;
CUDAStream<float>* psEnergy; // Energy output buffer
CUDAStream<float4>* psxVector4;
CUDAStream<float4>* psvVector4;
CUDAStream<float2>* psSigEps2;
CUDAStream<float4>* psCustomParams; // Atom parameters for custom nonbonded force
CUDAStream<int4>* psCustomBondID; // Atom indices for custom bonds
......
......@@ -67,7 +67,7 @@ void kBrownianUpdatePart1_kernel()
while (pos < cSim.atoms)
{
float4 random4a = cSim.pRandom4a[rpos + pos];
float4 random4a = cSim.pRandom4[rpos + pos];
float4 apos = cSim.pPosq[pos];
float4 force = cSim.pForce4[pos];
float invMass = cSim.pVelm4[pos].w;
......
......@@ -62,7 +62,7 @@ __global__ void kCalculateAndersenThermostat_kernel()
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 random4a = cSim.pRandom4a[rpos + pos];
float4 random4a = cSim.pRandom4[rpos + pos];
float scale = (random4a.w > -randomRange && random4a.w < randomRange ? 0.0f : 1.0f);
float add = (1.0f-scale)*sqrt(cSim.kT*velocity.w);
velocity.x = scale*velocity.x + add*random4a.x;
......
......@@ -34,7 +34,7 @@ using namespace std;
#include "gputypes.h"
enum {EM, EM_V, DOverTauC, TauOneMinusEM_V, TauDOverEMMinusOne, V, X, Yv, Yx, Fix1, OneOverFix1, MaxParams};
enum {VelScale, ForceScale, NoiseScale, MaxParams};
static __constant__ cudaGmxSimulation cSim;
......@@ -151,53 +151,12 @@ void kSelectLangevinStepSize_kernel(float maxStepSize)
// Recalculate the integration parameters.
float gdt = newStepSize / cSim.tau;
float eph = exp(0.5f * gdt);
float emh = exp(-0.5f * gdt);
float ep = exp(gdt);
float em = exp(-gdt);
float em_v = exp(-0.5f*(oldStepSize+newStepSize)/cSim.tau);
float b, c, d;
if (gdt >= 0.1f)
{
float term1 = eph - 1.0f;
term1 *= term1;
b = gdt * (ep - 1.0f) - 4.0f * term1;
c = gdt - 3.0f + 4.0f * emh - em;
d = 2.0f - eph - emh;
}
else
{
float term1 = 0.5f * gdt;
float term2 = term1 * term1;
float term4 = term2 * term2;
float third = 1.0f / 3.0f;
float o7_9 = 7.0f / 9.0f;
float o1_12 = 1.0f / 12.0f;
float o17_90 = 17.0f / 90.0f;
float o7_30 = 7.0f / 30.0f;
float o31_1260 = 31.0f / 1260.0f;
float o_360 = 1.0f / 360.0f;
b = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
c = term2 * term1 * (2.0f * third + term1 * (-0.5f + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
d = term2 * (-1.0f + term2 * (-o1_12 - term2 * o_360));
}
float fix1 = cSim.tau * (eph - emh);
if (fix1 == 0.0f)
fix1 = newStepSize;
params[EM] = em;
params[EM_V] = em_v;
params[DOverTauC] = d / (cSim.tau * c);
params[TauOneMinusEM_V] = cSim.tau * (1.0f-em_v);
params[TauDOverEMMinusOne] = cSim.tau * d / (em - 1.0f);
params[Fix1] = fix1;
params[OneOverFix1] = 1.0f / fix1;
params[V] = sqrt(cSim.kT * (1.0f - em));
params[X] = cSim.tau * sqrt(cSim.kT * c);
params[Yv] = sqrt(cSim.kT * b / c);
params[Yx] = cSim.tau * sqrt(cSim.kT * b / (1.0f - em));
float vscale = exp(-newStepSize/cSim.tau);
float fscale = (1-vscale)*cSim.tau;
float noisescale = sqrt(2*cSim.kT/cSim.tau)*sqrt(0.5f*(1-vscale*vscale)*cSim.tau);
params[VelScale] = vscale;
params[ForceScale] = fscale;
params[NoiseScale] = noisescale;
}
__syncthreads();
if (threadIdx.x < MaxParams)
......
......@@ -24,6 +24,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudatypes.h"
/**
* This file contains the kernels for Langevin integration. It is included
......@@ -45,12 +46,12 @@ void kLangevinUpdatePart1CM_kernel()
void kLangevinUpdatePart1_kernel()
#endif
{
__shared__ volatile float params[MaxParams];
if (threadIdx.x < MaxParams)
params[threadIdx.x] = cSim.pLangevinParameters[threadIdx.x];
__syncthreads();
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
float vscale = cSim.pLangevinParameters[VelScale];
float fscale = cSim.pLangevinParameters[ForceScale];
float noisescale = cSim.pLangevinParameters[NoiseScale];
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = { 0.0f, 0.0f, 0.0f};
......@@ -91,45 +92,19 @@ void kLangevinUpdatePart1_kernel()
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xVector = cSim.pxVector4[pos];
float4 random4a = cSim.pRandom4a[rpos + pos];
float2 random2a = cSim.pRandom2a[rpos + pos];
float4 apos = cSim.pPosq[pos];
float4 random4a = cSim.pRandom4[rpos + pos];
float4 force = cSim.pForce4[pos];
float3 Vmh;
float sqrtInvMass = sqrt(velocity.w);
Vmh.x = xVector.x * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.x;
Vmh.y = xVector.y * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.y;
Vmh.z = xVector.z * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.z;
float4 vVector;
vVector.x = sqrtInvMass * params[V] * random4a.w;
vVector.y = sqrtInvMass * params[V] * random2a.x;
vVector.z = sqrtInvMass * params[V] * random2a.y;
vVector.w = 0.0f;
cSim.pvVector4[pos] = vVector;
velocity.x = velocity.x * params[EM_V] +
velocity.w * force.x * params[TauOneMinusEM_V] +
vVector.x -
params[EM] * Vmh.x;
velocity.y = velocity.y * params[EM_V] +
velocity.w * force.y * params[TauOneMinusEM_V] +
vVector.y -
params[EM] * Vmh.y;
velocity.z = velocity.z * params[EM_V] +
velocity.w * force.z * params[TauOneMinusEM_V] +
vVector.z -
params[EM] * Vmh.z;
velocity.x = vscale*velocity.x + fscale*velocity.w*force.x + noisescale*sqrtInvMass*random4a.x;
velocity.y = vscale*velocity.y + fscale*velocity.w*force.y + noisescale*sqrtInvMass*random4a.y;
velocity.z = vscale*velocity.z + fscale*velocity.w*force.z + noisescale*sqrtInvMass*random4a.z;
#ifdef REMOVE_CM
velocity.x -= sCM[0].x;
velocity.y -= sCM[0].y;
velocity.z -= sCM[0].z;
#endif
cSim.pOldPosq[pos] = apos;
apos.x = velocity.x * params[Fix1];
apos.y = velocity.y * params[Fix1];
apos.z = velocity.z * params[Fix1];
cSim.pPosqP[pos] = apos;
cSim.pOldPosq[pos] = cSim.pPosq[pos];
cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x;
}
......@@ -149,12 +124,9 @@ void kLangevinUpdatePart2CM_kernel()
void kLangevinUpdatePart2_kernel()
#endif
{
__shared__ float params[MaxParams];
if (threadIdx.x < MaxParams)
params[threadIdx.x] = cSim.pLangevinParameters[threadIdx.x];
__syncthreads();
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
float dt = cSim.pStepSize[0].y;
#ifdef REMOVE_CM
extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f};
......@@ -164,16 +136,6 @@ void kLangevinUpdatePart2_kernel()
while (pos < cSim.atoms)
{
float4 velocity = cSim.pVelm4[pos];
float4 xPrime = cSim.pPosqP[pos];
float4 vVector = cSim.pvVector4[pos];
float4 xVector;
float4 random4b = cSim.pRandom4b[rpos + pos];
float2 random2b = cSim.pRandom2b[rpos + pos];
float3 Xmh;
float sqrtInvMass = sqrt(velocity.w);
velocity.x = xPrime.x * params[OneOverFix1];
velocity.y = xPrime.y * params[OneOverFix1];
velocity.z = xPrime.z * params[OneOverFix1];
#ifdef REMOVE_CM
float mass = 1.0f / velocity.w;
CM.x += mass * velocity.x;
......@@ -181,24 +143,9 @@ void kLangevinUpdatePart2_kernel()
CM.z += mass * velocity.z;
#endif
Xmh.x = vVector.x * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.x;
Xmh.y = vVector.y * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.y;
Xmh.z = vVector.z * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.z;
xVector.x = sqrtInvMass * params[X] * random4b.w;
xVector.y = sqrtInvMass * params[X] * random2b.x;
xVector.z = sqrtInvMass * params[X] * random2b.y;
xPrime.x += xVector.x - Xmh.x;
xPrime.y += xVector.y - Xmh.y;
xPrime.z += xVector.z - Xmh.z;
float4 xPrime = make_float4(dt*velocity.x, dt*velocity.y, dt*velocity.z, 0);
cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
cSim.pxVector4[pos] = xVector;
pos += blockDim.x * gridDim.x;
}
......
......@@ -73,7 +73,7 @@ void kGenerateRandoms_kernel()
float4 random4;
float2 random2;
while (pos < cSim.totalRandomsTimesTwo)
while (pos < cSim.totalRandoms)
{
// Generate 6 randoms in GRF
......@@ -156,10 +156,10 @@ void kGenerateRandoms_kernel()
random4.y = sRand[threadIdx.x].y;
random4.z = sRand[threadIdx.x].z;
random4.w = sRand[threadIdx.x + blockDim.x].x;
cSim.pRandom4a[pos] = random4;
cSim.pRandom4[pos] = random4;
random2.x = sRand[threadIdx.x + blockDim.x].y;
random2.y = sRand[threadIdx.x + blockDim.x].z;
cSim.pRandom2a[pos] = random2;
cSim.pRandom2[pos] = random2;
pos += increment;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment