Commit 8725cede authored by Peter Eastman's avatar Peter Eastman
Browse files

Converted Cuda platform to use improved Langevin integrator

parent 15a8478e
...@@ -869,9 +869,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev ...@@ -869,9 +869,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
prevStepSize = stepSize; prevStepSize = stepSize;
} }
kLangevinUpdatePart1(gpu); kLangevinUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM) if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0) if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true; gpu->bCalculateCM = true;
...@@ -991,9 +988,6 @@ void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, cons ...@@ -991,9 +988,6 @@ void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, cons
float maxStepSize = (float)(maxTime-data.time); float maxStepSize = (float)(maxTime-data.time);
kSelectLangevinStepSize(gpu, maxStepSize); kSelectLangevinStepSize(gpu, maxStepSize);
kLangevinUpdatePart1(gpu); kLangevinUpdatePart1(gpu);
kApplyFirstShake(gpu);
kApplyFirstSettle(gpu);
kApplyFirstCCMA(gpu);
if (data.removeCM) if (data.removeCM)
if (data.stepCount%data.cmMotionFrequency == 0) if (data.stepCount%data.cmMotionFrequency == 0)
gpu->bCalculateCM = true; gpu->bCalculateCM = true;
......
...@@ -471,8 +471,6 @@ struct cudaGmxSimulation { ...@@ -471,8 +471,6 @@ struct cudaGmxSimulation {
float4* pPosqP; // Pointer to mid-integration atom positions float4* pPosqP; // Pointer to mid-integration atom positions
float4* pOldPosq; // Pointer to old atom positions float4* pOldPosq; // Pointer to old atom positions
float4* pVelm4; // Pointer to atom velocity and inverse mass float4* pVelm4; // Pointer to atom velocity and inverse mass
float4* pvVector4; // Pointer to atom v Vector
float4* pxVector4; // Pointer to atom x Vector
float4* pForce4; // Pointer to force data float4* pForce4; // Pointer to force data
float* pEnergy; // Pointer to energy output buffer float* pEnergy; // Pointer to energy output buffer
float* pBornForce; // Pointer to Born force data float* pBornForce; // Pointer to Born force data
...@@ -482,15 +480,12 @@ struct cudaGmxSimulation { ...@@ -482,15 +480,12 @@ struct cudaGmxSimulation {
float4* pLinearMomentum; // Pointer to linear momentum float4* pLinearMomentum; // Pointer to linear momentum
// Random numbers // Random numbers
float4* pRandom4a; // Pointer to first set of 4 random numbers float4* pRandom4; // Pointer to 4 random numbers
float4* pRandom4b; // Pointer to second set of 4 random numbers float2* pRandom2; // Pointer to 2 random numbers
float2* pRandom2a; // Pointer to first set of 2 random numbers
float2* pRandom2b; // Pointer to second set of 2 random numbers
uint4* pRandomSeed; // Pointer to random seeds uint4* pRandomSeed; // Pointer to random seeds
int* pRandomPosition; // Pointer to random number positions int* pRandomPosition; // Pointer to random number positions
unsigned int randoms; // Number of randoms unsigned int randoms; // Number of randoms
unsigned int totalRandoms; // Number of randoms plus overflow. unsigned int totalRandoms; // Number of randoms plus overflow.
unsigned int totalRandomsTimesTwo; // Used for generating randoms
unsigned int randomIterations; // Number of iterations before regenerating randoms unsigned int randomIterations; // Number of iterations before regenerating randoms
unsigned int randomFrames; // Number of frames of random numbers unsigned int randomFrames; // Number of frames of random numbers
}; };
......
...@@ -1651,10 +1651,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu) ...@@ -1651,10 +1651,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pOldPosq = gpu->psOldPosq4->_pDevStream[0]; gpu->sim.pOldPosq = gpu->psOldPosq4->_pDevStream[0];
gpu->psVelm4 = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "Velm"); gpu->psVelm4 = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "Velm");
gpu->sim.pVelm4 = gpu->psVelm4->_pDevStream[0]; gpu->sim.pVelm4 = gpu->psVelm4->_pDevStream[0];
gpu->psvVector4 = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "vVector");
gpu->sim.pvVector4 = gpu->psvVector4->_pDevStream[0];
gpu->psxVector4 = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "xVector");
gpu->sim.pxVector4 = gpu->psxVector4->_pDevStream[0];
gpu->psBornRadii = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "BornRadii"); gpu->psBornRadii = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "BornRadii");
gpu->sim.pBornRadii = gpu->psBornRadii->_pDevStream[0]; gpu->sim.pBornRadii = gpu->psBornRadii->_pDevStream[0];
gpu->psObcChain = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "ObcChain"); gpu->psObcChain = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "ObcChain");
...@@ -1669,7 +1665,7 @@ int gpuAllocateInitialBuffers(gpuContext gpu) ...@@ -1669,7 +1665,7 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.pStepSize = gpu->psStepSize->_pDevStream[0]; gpu->sim.pStepSize = gpu->psStepSize->_pDevStream[0];
(*gpu->psStepSize)[0] = make_float2(0.0f, 0.0f); (*gpu->psStepSize)[0] = make_float2(0.0f, 0.0f);
gpu->psStepSize->Upload(); gpu->psStepSize->Upload();
gpu->psLangevinParameters = new CUDAStream<float>(11, 1, "LangevinParameters"); gpu->psLangevinParameters = new CUDAStream<float>(3, 1, "LangevinParameters");
gpu->sim.pLangevinParameters = gpu->psLangevinParameters->_pDevStream[0]; gpu->sim.pLangevinParameters = gpu->psLangevinParameters->_pDevStream[0];
gpu->pAtomSymbol = new unsigned char[gpu->natoms]; gpu->pAtomSymbol = new unsigned char[gpu->natoms];
gpu->psAtomIndex = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex"); gpu->psAtomIndex = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex");
...@@ -1684,15 +1680,12 @@ int gpuAllocateInitialBuffers(gpuContext gpu) ...@@ -1684,15 +1680,12 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu->sim.randomIterations = gpu->sim.randomFrames; gpu->sim.randomIterations = gpu->sim.randomFrames;
gpu->sim.randoms = gpu->sim.randomFrames * gpu->sim.paddedNumberOfAtoms; gpu->sim.randoms = gpu->sim.randomFrames * gpu->sim.paddedNumberOfAtoms;
gpu->sim.totalRandoms = gpu->sim.randoms + gpu->sim.paddedNumberOfAtoms; gpu->sim.totalRandoms = gpu->sim.randoms + gpu->sim.paddedNumberOfAtoms;
gpu->sim.totalRandomsTimesTwo = gpu->sim.totalRandoms * 2; gpu->psRandom4 = new CUDAStream<float4>(gpu->sim.totalRandoms, 1, "Random4");
gpu->psRandom4 = new CUDAStream<float4>(gpu->sim.totalRandomsTimesTwo, 1, "Random4"); gpu->psRandom2 = new CUDAStream<float2>(gpu->sim.totalRandoms, 1, "Random2");
gpu->psRandom2 = new CUDAStream<float2>(gpu->sim.totalRandomsTimesTwo, 1, "Random2");
gpu->psRandomPosition = new CUDAStream<int>(gpu->sim.blocks, 1, "RandomPosition"); gpu->psRandomPosition = new CUDAStream<int>(gpu->sim.blocks, 1, "RandomPosition");
gpu->psRandomSeed = new CUDAStream<uint4>(gpu->sim.blocks * gpu->sim.random_threads_per_block, 1, "RandomSeed"); gpu->psRandomSeed = new CUDAStream<uint4>(gpu->sim.blocks * gpu->sim.random_threads_per_block, 1, "RandomSeed");
gpu->sim.pRandom4a = gpu->psRandom4->_pDevStream[0]; gpu->sim.pRandom4 = gpu->psRandom4->_pDevStream[0];
gpu->sim.pRandom2a = gpu->psRandom2->_pDevStream[0]; gpu->sim.pRandom2 = gpu->psRandom2->_pDevStream[0];
gpu->sim.pRandom4b = gpu->psRandom4->_pDevStream[0] + gpu->sim.totalRandoms;
gpu->sim.pRandom2b = gpu->psRandom2->_pDevStream[0] + gpu->sim.totalRandoms;
gpu->sim.pRandomPosition = gpu->psRandomPosition->_pDevStream[0]; gpu->sim.pRandomPosition = gpu->psRandomPosition->_pDevStream[0];
gpu->sim.pRandomSeed = gpu->psRandomSeed->_pDevStream[0]; gpu->sim.pRandomSeed = gpu->psRandomSeed->_pDevStream[0];
...@@ -1884,14 +1877,6 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync) ...@@ -1884,14 +1877,6 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu->natoms = numAtoms; gpu->natoms = numAtoms;
gpuAllocateInitialBuffers(gpu); gpuAllocateInitialBuffers(gpu);
for (int i = 0; i < gpu->natoms; i++)
{
(*gpu->psxVector4)[i].x = 0.0f;
(*gpu->psxVector4)[i].y = 0.0f;
(*gpu->psxVector4)[i].z = 0.0f;
(*gpu->psxVector4)[i].w = 0.0f;
}
gpu->psxVector4->Upload();
gpu->iterations = 0; gpu->iterations = 0;
gpu->sim.update_threads_per_block = (gpu->natoms + gpu->sim.blocks - 1) / gpu->sim.blocks; gpu->sim.update_threads_per_block = (gpu->natoms + gpu->sim.blocks - 1) / gpu->sim.blocks;
...@@ -2023,60 +2008,12 @@ void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT ...@@ -2023,60 +2008,12 @@ void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT
gpu->sim.tau = tau; gpu->sim.tau = tau;
gpu->sim.T = temperature; gpu->sim.T = temperature;
gpu->sim.kT = BOLTZ * gpu->sim.T; gpu->sim.kT = BOLTZ * gpu->sim.T;
double GDT = gpu->sim.deltaT / gpu->sim.tau; double vscale = exp(-deltaT/tau);
double EPH = exp(0.5 * GDT); double fscale = (1-vscale)*tau;
double EMH = exp(-0.5 * GDT); double noisescale = sqrt(2*gpu->sim.kT/tau)*sqrt(0.5*(1-vscale*vscale)*tau);
double EP = exp(GDT); (*gpu->psLangevinParameters)[0] = (float) vscale;
double EM = exp(-GDT); (*gpu->psLangevinParameters)[1] = (float) fscale;
double B, C, D; (*gpu->psLangevinParameters)[2] = (float) noisescale;
if (GDT >= 0.1)
{
double term1 = EPH - 1.0;
term1 *= term1;
B = GDT * (EP - 1.0) - 4.0 * term1;
C = GDT - 3.0 + 4.0 * EMH - EM;
D = 2.0 - EPH - EMH;
}
else
{
double term1 = 0.5 * GDT;
double term2 = term1 * term1;
double term4 = term2 * term2;
double third = 1.0 / 3.0;
double o7_9 = 7.0 / 9.0;
double o1_12 = 1.0 / 12.0;
double o17_90 = 17.0 / 90.0;
double o7_30 = 7.0 / 30.0;
double o31_1260 = 31.0 / 1260.0;
double o_360 = 1.0 / 360.0;
B = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
C = term2 * term1 * (2.0 * third + term1 * (-0.5 + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
D = term2 * (-1.0 + term2 * (-o1_12 - term2 * o_360));
}
double DOverTauC = D / (gpu->sim.tau * C);
double TauOneMinusEM = gpu->sim.tau * (1.0-EM);
double TauDOverEMMinusOne = gpu->sim.tau * D / (EM - 1.0);
double fix1 = gpu->sim.tau * (EPH - EMH);
if (fix1 == 0.0)
fix1 = deltaT;
double oneOverFix1 = 1.0 / fix1;
double V = sqrt(gpu->sim.kT * (1.0 - EM));
double X = gpu->sim.tau * sqrt(gpu->sim.kT * C);
double Yv = sqrt(gpu->sim.kT * B / C);
double Yx = gpu->sim.tau * sqrt(gpu->sim.kT * B / (1.0 - EM));
(*gpu->psLangevinParameters)[0] = (float) EM;
(*gpu->psLangevinParameters)[1] = (float) EM;
(*gpu->psLangevinParameters)[2] = (float) DOverTauC;
(*gpu->psLangevinParameters)[3] = (float) TauOneMinusEM;
(*gpu->psLangevinParameters)[4] = (float) TauDOverEMMinusOne;
(*gpu->psLangevinParameters)[5] = (float) V;
(*gpu->psLangevinParameters)[6] = (float) X;
(*gpu->psLangevinParameters)[7] = (float) Yv;
(*gpu->psLangevinParameters)[8] = (float) Yx;
(*gpu->psLangevinParameters)[9] = (float) fix1;
(*gpu->psLangevinParameters)[10] = (float) oneOverFix1;
gpu->psLangevinParameters->Upload(); gpu->psLangevinParameters->Upload();
gpu->psStepSize->Download(); gpu->psStepSize->Download();
(*gpu->psStepSize)[0].y = deltaT; (*gpu->psStepSize)[0].y = deltaT;
...@@ -2129,8 +2066,6 @@ void gpuShutDown(gpuContext gpu) ...@@ -2129,8 +2066,6 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psVelm4; delete gpu->psVelm4;
delete gpu->psForce4; delete gpu->psForce4;
delete gpu->psEnergy; delete gpu->psEnergy;
delete gpu->psxVector4;
delete gpu->psvVector4;
delete gpu->psSigEps2; delete gpu->psSigEps2;
if (gpu->psCustomParams != NULL) if (gpu->psCustomParams != NULL)
delete gpu->psCustomParams; delete gpu->psCustomParams;
......
...@@ -101,8 +101,6 @@ struct _gpuContext { ...@@ -101,8 +101,6 @@ struct _gpuContext {
CUDAStream<float4>* psVelm4; CUDAStream<float4>* psVelm4;
CUDAStream<float4>* psForce4; CUDAStream<float4>* psForce4;
CUDAStream<float>* psEnergy; // Energy output buffer CUDAStream<float>* psEnergy; // Energy output buffer
CUDAStream<float4>* psxVector4;
CUDAStream<float4>* psvVector4;
CUDAStream<float2>* psSigEps2; CUDAStream<float2>* psSigEps2;
CUDAStream<float4>* psCustomParams; // Atom parameters for custom nonbonded force CUDAStream<float4>* psCustomParams; // Atom parameters for custom nonbonded force
CUDAStream<int4>* psCustomBondID; // Atom indices for custom bonds CUDAStream<int4>* psCustomBondID; // Atom indices for custom bonds
......
...@@ -51,14 +51,14 @@ void GetBrownianUpdateSim(gpuContext gpu) ...@@ -51,14 +51,14 @@ void GetBrownianUpdateSim(gpuContext gpu)
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed"); RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
} }
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
#endif #endif
void kBrownianUpdatePart1_kernel() void kBrownianUpdatePart1_kernel()
{ {
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
...@@ -67,7 +67,7 @@ void kBrownianUpdatePart1_kernel() ...@@ -67,7 +67,7 @@ void kBrownianUpdatePart1_kernel()
while (pos < cSim.atoms) while (pos < cSim.atoms)
{ {
float4 random4a = cSim.pRandom4a[rpos + pos]; float4 random4a = cSim.pRandom4[rpos + pos];
float4 apos = cSim.pPosq[pos]; float4 apos = cSim.pPosq[pos];
float4 force = cSim.pForce4[pos]; float4 force = cSim.pForce4[pos];
float invMass = cSim.pVelm4[pos].w; float invMass = cSim.pVelm4[pos].w;
...@@ -90,15 +90,15 @@ void kBrownianUpdatePart1(gpuContext gpu) ...@@ -90,15 +90,15 @@ void kBrownianUpdatePart1(gpuContext gpu)
LAUNCHERROR("kBrownianUpdatePart1"); LAUNCHERROR("kBrownianUpdatePart1");
} }
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
#endif #endif
void kBrownianUpdatePart2_kernel() void kBrownianUpdatePart2_kernel()
{ {
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x]; unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
......
...@@ -62,7 +62,7 @@ __global__ void kCalculateAndersenThermostat_kernel() ...@@ -62,7 +62,7 @@ __global__ void kCalculateAndersenThermostat_kernel()
while (pos < cSim.atoms) while (pos < cSim.atoms)
{ {
float4 velocity = cSim.pVelm4[pos]; float4 velocity = cSim.pVelm4[pos];
float4 random4a = cSim.pRandom4a[rpos + pos]; float4 random4a = cSim.pRandom4[rpos + pos];
float scale = (random4a.w > -randomRange && random4a.w < randomRange ? 0.0f : 1.0f); float scale = (random4a.w > -randomRange && random4a.w < randomRange ? 0.0f : 1.0f);
float add = (1.0f-scale)*sqrt(cSim.kT*velocity.w); float add = (1.0f-scale)*sqrt(cSim.kT*velocity.w);
velocity.x = scale*velocity.x + add*random4a.x; velocity.x = scale*velocity.x + add*random4a.x;
......
...@@ -34,7 +34,7 @@ using namespace std; ...@@ -34,7 +34,7 @@ using namespace std;
#include "gputypes.h" #include "gputypes.h"
enum {EM, EM_V, DOverTauC, TauOneMinusEM_V, TauDOverEMMinusOne, V, X, Yv, Yx, Fix1, OneOverFix1, MaxParams}; enum {VelScale, ForceScale, NoiseScale, MaxParams};
static __constant__ cudaGmxSimulation cSim; static __constant__ cudaGmxSimulation cSim;
...@@ -101,15 +101,15 @@ void kLangevinUpdatePart2(gpuContext gpu) ...@@ -101,15 +101,15 @@ void kLangevinUpdatePart2(gpuContext gpu)
} }
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
#endif #endif
void kSelectLangevinStepSize_kernel(float maxStepSize) void kSelectLangevinStepSize_kernel(float maxStepSize)
{ {
// Calculate the error. // Calculate the error.
...@@ -151,53 +151,12 @@ void kSelectLangevinStepSize_kernel(float maxStepSize) ...@@ -151,53 +151,12 @@ void kSelectLangevinStepSize_kernel(float maxStepSize)
// Recalculate the integration parameters. // Recalculate the integration parameters.
float gdt = newStepSize / cSim.tau; float vscale = exp(-newStepSize/cSim.tau);
float eph = exp(0.5f * gdt); float fscale = (1-vscale)*cSim.tau;
float emh = exp(-0.5f * gdt); float noisescale = sqrt(2*cSim.kT/cSim.tau)*sqrt(0.5f*(1-vscale*vscale)*cSim.tau);
float ep = exp(gdt); params[VelScale] = vscale;
float em = exp(-gdt); params[ForceScale] = fscale;
float em_v = exp(-0.5f*(oldStepSize+newStepSize)/cSim.tau); params[NoiseScale] = noisescale;
float b, c, d;
if (gdt >= 0.1f)
{
float term1 = eph - 1.0f;
term1 *= term1;
b = gdt * (ep - 1.0f) - 4.0f * term1;
c = gdt - 3.0f + 4.0f * emh - em;
d = 2.0f - eph - emh;
}
else
{
float term1 = 0.5f * gdt;
float term2 = term1 * term1;
float term4 = term2 * term2;
float third = 1.0f / 3.0f;
float o7_9 = 7.0f / 9.0f;
float o1_12 = 1.0f / 12.0f;
float o17_90 = 17.0f / 90.0f;
float o7_30 = 7.0f / 30.0f;
float o31_1260 = 31.0f / 1260.0f;
float o_360 = 1.0f / 360.0f;
b = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
c = term2 * term1 * (2.0f * third + term1 * (-0.5f + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
d = term2 * (-1.0f + term2 * (-o1_12 - term2 * o_360));
}
float fix1 = cSim.tau * (eph - emh);
if (fix1 == 0.0f)
fix1 = newStepSize;
params[EM] = em;
params[EM_V] = em_v;
params[DOverTauC] = d / (cSim.tau * c);
params[TauOneMinusEM_V] = cSim.tau * (1.0f-em_v);
params[TauDOverEMMinusOne] = cSim.tau * d / (em - 1.0f);
params[Fix1] = fix1;
params[OneOverFix1] = 1.0f / fix1;
params[V] = sqrt(cSim.kT * (1.0f - em));
params[X] = cSim.tau * sqrt(cSim.kT * c);
params[Yv] = sqrt(cSim.kT * b / c);
params[Yx] = cSim.tau * sqrt(cSim.kT * b / (1.0f - em));
} }
__syncthreads(); __syncthreads();
if (threadIdx.x < MaxParams) if (threadIdx.x < MaxParams)
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "cudatypes.h"
/** /**
* This file contains the kernels for Langevin integration. It is included * This file contains the kernels for Langevin integration. It is included
...@@ -45,12 +46,12 @@ void kLangevinUpdatePart1CM_kernel() ...@@ -45,12 +46,12 @@ void kLangevinUpdatePart1CM_kernel()
void kLangevinUpdatePart1_kernel() void kLangevinUpdatePart1_kernel()
#endif #endif
{ {
__shared__ volatile float params[MaxParams];
if (threadIdx.x < MaxParams)
params[threadIdx.x] = cSim.pLangevinParameters[threadIdx.x];
__syncthreads();
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x]; unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
float vscale = cSim.pLangevinParameters[VelScale];
float fscale = cSim.pLangevinParameters[ForceScale];
float noisescale = cSim.pLangevinParameters[NoiseScale];
#ifdef REMOVE_CM #ifdef REMOVE_CM
extern __shared__ float3 sCM[]; extern __shared__ float3 sCM[];
float3 CM = { 0.0f, 0.0f, 0.0f}; float3 CM = { 0.0f, 0.0f, 0.0f};
...@@ -91,45 +92,19 @@ void kLangevinUpdatePart1_kernel() ...@@ -91,45 +92,19 @@ void kLangevinUpdatePart1_kernel()
while (pos < cSim.atoms) while (pos < cSim.atoms)
{ {
float4 velocity = cSim.pVelm4[pos]; float4 velocity = cSim.pVelm4[pos];
float4 xVector = cSim.pxVector4[pos]; float4 random4a = cSim.pRandom4[rpos + pos];
float4 random4a = cSim.pRandom4a[rpos + pos];
float2 random2a = cSim.pRandom2a[rpos + pos];
float4 apos = cSim.pPosq[pos];
float4 force = cSim.pForce4[pos]; float4 force = cSim.pForce4[pos];
float3 Vmh;
float sqrtInvMass = sqrt(velocity.w); float sqrtInvMass = sqrt(velocity.w);
Vmh.x = xVector.x * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.x; velocity.x = vscale*velocity.x + fscale*velocity.w*force.x + noisescale*sqrtInvMass*random4a.x;
Vmh.y = xVector.y * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.y; velocity.y = vscale*velocity.y + fscale*velocity.w*force.y + noisescale*sqrtInvMass*random4a.y;
Vmh.z = xVector.z * params[DOverTauC] + sqrtInvMass * params[Yv] * random4a.z; velocity.z = vscale*velocity.z + fscale*velocity.w*force.z + noisescale*sqrtInvMass*random4a.z;
float4 vVector;
vVector.x = sqrtInvMass * params[V] * random4a.w;
vVector.y = sqrtInvMass * params[V] * random2a.x;
vVector.z = sqrtInvMass * params[V] * random2a.y;
vVector.w = 0.0f;
cSim.pvVector4[pos] = vVector;
velocity.x = velocity.x * params[EM_V] +
velocity.w * force.x * params[TauOneMinusEM_V] +
vVector.x -
params[EM] * Vmh.x;
velocity.y = velocity.y * params[EM_V] +
velocity.w * force.y * params[TauOneMinusEM_V] +
vVector.y -
params[EM] * Vmh.y;
velocity.z = velocity.z * params[EM_V] +
velocity.w * force.z * params[TauOneMinusEM_V] +
vVector.z -
params[EM] * Vmh.z;
#ifdef REMOVE_CM #ifdef REMOVE_CM
velocity.x -= sCM[0].x; velocity.x -= sCM[0].x;
velocity.y -= sCM[0].y; velocity.y -= sCM[0].y;
velocity.z -= sCM[0].z; velocity.z -= sCM[0].z;
#endif #endif
cSim.pOldPosq[pos] = apos; cSim.pOldPosq[pos] = cSim.pPosq[pos];
apos.x = velocity.x * params[Fix1];
apos.y = velocity.y * params[Fix1];
apos.z = velocity.z * params[Fix1];
cSim.pPosqP[pos] = apos;
cSim.pVelm4[pos] = velocity; cSim.pVelm4[pos] = velocity;
pos += blockDim.x * gridDim.x; pos += blockDim.x * gridDim.x;
} }
...@@ -149,12 +124,9 @@ void kLangevinUpdatePart2CM_kernel() ...@@ -149,12 +124,9 @@ void kLangevinUpdatePart2CM_kernel()
void kLangevinUpdatePart2_kernel() void kLangevinUpdatePart2_kernel()
#endif #endif
{ {
__shared__ float params[MaxParams];
if (threadIdx.x < MaxParams)
params[threadIdx.x] = cSim.pLangevinParameters[threadIdx.x];
__syncthreads();
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int rpos = cSim.pRandomPosition[blockIdx.x]; unsigned int rpos = cSim.pRandomPosition[blockIdx.x];
float dt = cSim.pStepSize[0].y;
#ifdef REMOVE_CM #ifdef REMOVE_CM
extern __shared__ float3 sCM[]; extern __shared__ float3 sCM[];
float3 CM = {0.0f, 0.0f, 0.0f}; float3 CM = {0.0f, 0.0f, 0.0f};
...@@ -164,16 +136,6 @@ void kLangevinUpdatePart2_kernel() ...@@ -164,16 +136,6 @@ void kLangevinUpdatePart2_kernel()
while (pos < cSim.atoms) while (pos < cSim.atoms)
{ {
float4 velocity = cSim.pVelm4[pos]; float4 velocity = cSim.pVelm4[pos];
float4 xPrime = cSim.pPosqP[pos];
float4 vVector = cSim.pvVector4[pos];
float4 xVector;
float4 random4b = cSim.pRandom4b[rpos + pos];
float2 random2b = cSim.pRandom2b[rpos + pos];
float3 Xmh;
float sqrtInvMass = sqrt(velocity.w);
velocity.x = xPrime.x * params[OneOverFix1];
velocity.y = xPrime.y * params[OneOverFix1];
velocity.z = xPrime.z * params[OneOverFix1];
#ifdef REMOVE_CM #ifdef REMOVE_CM
float mass = 1.0f / velocity.w; float mass = 1.0f / velocity.w;
CM.x += mass * velocity.x; CM.x += mass * velocity.x;
...@@ -181,24 +143,9 @@ void kLangevinUpdatePart2_kernel() ...@@ -181,24 +143,9 @@ void kLangevinUpdatePart2_kernel()
CM.z += mass * velocity.z; CM.z += mass * velocity.z;
#endif #endif
Xmh.x = vVector.x * params[TauDOverEMMinusOne] + float4 xPrime = make_float4(dt*velocity.x, dt*velocity.y, dt*velocity.z, 0);
sqrtInvMass * params[Yx] * random4b.x;
Xmh.y = vVector.y * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.y;
Xmh.z = vVector.z * params[TauDOverEMMinusOne] +
sqrtInvMass * params[Yx] * random4b.z;
xVector.x = sqrtInvMass * params[X] * random4b.w;
xVector.y = sqrtInvMass * params[X] * random2b.x;
xVector.z = sqrtInvMass * params[X] * random2b.y;
xPrime.x += xVector.x - Xmh.x;
xPrime.y += xVector.y - Xmh.y;
xPrime.z += xVector.z - Xmh.z;
cSim.pPosq[pos] = xPrime; cSim.pPosq[pos] = xPrime;
cSim.pVelm4[pos] = velocity;
cSim.pxVector4[pos] = xVector;
pos += blockDim.x * gridDim.x; pos += blockDim.x * gridDim.x;
} }
......
...@@ -54,14 +54,14 @@ void GetRandomSim(gpuContext gpu) ...@@ -54,14 +54,14 @@ void GetRandomSim(gpuContext gpu)
extern __shared__ float3 sRand[]; extern __shared__ float3 sRand[];
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_RANDOM_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_RANDOM_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_RANDOM_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_RANDOM_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_RANDOM_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_RANDOM_THREADS_PER_BLOCK, 1)
#endif #endif
void kGenerateRandoms_kernel() void kGenerateRandoms_kernel()
{ {
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -73,7 +73,7 @@ void kGenerateRandoms_kernel() ...@@ -73,7 +73,7 @@ void kGenerateRandoms_kernel()
float4 random4; float4 random4;
float2 random2; float2 random2;
while (pos < cSim.totalRandomsTimesTwo) while (pos < cSim.totalRandoms)
{ {
// Generate 6 randoms in GRF // Generate 6 randoms in GRF
...@@ -156,10 +156,10 @@ void kGenerateRandoms_kernel() ...@@ -156,10 +156,10 @@ void kGenerateRandoms_kernel()
random4.y = sRand[threadIdx.x].y; random4.y = sRand[threadIdx.x].y;
random4.z = sRand[threadIdx.x].z; random4.z = sRand[threadIdx.x].z;
random4.w = sRand[threadIdx.x + blockDim.x].x; random4.w = sRand[threadIdx.x + blockDim.x].x;
cSim.pRandom4a[pos] = random4; cSim.pRandom4[pos] = random4;
random2.x = sRand[threadIdx.x + blockDim.x].y; random2.x = sRand[threadIdx.x + blockDim.x].y;
random2.y = sRand[threadIdx.x + blockDim.x].z; random2.y = sRand[threadIdx.x + blockDim.x].z;
cSim.pRandom2a[pos] = random2; cSim.pRandom2[pos] = random2;
pos += increment; pos += increment;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment