Use double precision when available to increase integration accuracy

7c1846a8 · Peter Eastman · d9941f47 · 7c1846a8 · 7c1846a8 · 7c1846a8
Commit 7c1846a8 authored Aug 31, 2010 by Peter Eastman
4 changed files
--- a/platforms/cuda/src/kernels/kLangevinUpdate.cu
+++ b/platforms/cuda/src/kernels/kLangevinUpdate.cu
@@ -181,16 +181,16 @@ __launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
 void kSetVelocitiesFromPositions_kernel()
 {
    float2 stepSize = cSim.pStepSize[0];
-    float oneOverDt = 2.0f/(stepSize.x+stepSize.y);
+    double oneOverDt = 2.0/(stepSize.x+stepSize.y);
    unsigned int pos = threadIdx.x;
    while (pos < cSim.atoms)
    {
        float4 posq = cSim.pPosq[pos];
        float4 oldPosq = cSim.pOldPosq[pos];
        float4 velm = cSim.pVelm4[pos];
-        velm.x = oneOverDt*(posq.x-oldPosq.x);
+        velm.x = (float) (oneOverDt*((double)posq.x-(double)oldPosq.x));
-        velm.y = oneOverDt*(posq.y-oldPosq.y);
+        velm.y = (float) (oneOverDt*((double)posq.y-(double)oldPosq.y));
-        velm.z = oneOverDt*(posq.z-oldPosq.z);
+        velm.z = (float) (oneOverDt*((double)posq.z-(double)oldPosq.z));
        cSim.pVelm4[pos] = velm;
        pos += blockDim.x * gridDim.x;
    }

--- a/platforms/cuda/src/kernels/kVerletUpdate.h
+++ b/platforms/cuda/src/kernels/kVerletUpdate.h
@@ -135,11 +135,11 @@ void kVerletUpdatePart2_kernel()
 {
    // Load the step size to take.
    unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
-    __shared__ float oneOverDeltaT;
+    __shared__ double oneOverDeltaT;
    if (threadIdx.x == 0)
    {
        float dt = cSim.pStepSize[0].y;
-        oneOverDeltaT = 1.0f/dt;
+        oneOverDeltaT = 1.0/dt;
        if (pos == 0)
            cSim.pStepSize[0].x = dt;
    }
@@ -155,9 +155,9 @@ void kVerletUpdatePart2_kernel()
        float4 apos             = cSim.pPosq[pos];
        float4 xPrime           = cSim.pPosqP[pos];
-        velocity.x              = oneOverDeltaT*(xPrime.x);
+        velocity.x              = (float) (oneOverDeltaT*(double)xPrime.x);
-        velocity.y              = oneOverDeltaT*(xPrime.y);
+        velocity.y              = (float) (oneOverDeltaT*(double)xPrime.y);
-        velocity.z              = oneOverDeltaT*(xPrime.z);
+        velocity.z              = (float) (oneOverDeltaT*(double)xPrime.z);
        xPrime.x               += apos.x;
        xPrime.y               += apos.y;

--- a/platforms/opencl/src/kernels/langevin.cl
+++ b/platforms/opencl/src/kernels/langevin.cl
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
 enum {VelScale, ForceScale, NoiseScale, MaxParams};
 /**
@@ -28,14 +32,22 @@ __kernel void integrateLangevinPart1(__global float4* velm, __global float4* for
 */
 __kernel void integrateLangevinPart2(__global float4* posq, __global float4* posDelta, __global float4* velm, __global float2* dt) {
+#ifdef cl_khr_fp64
+    double invStepSize = 1.0/dt[0].y;
+#else
    float invStepSize = 1.0f/dt[0].y;
+#endif
    int index = get_global_id(0);
    while (index < NUM_ATOMS) {
        float4 pos = posq[index];
        float4 delta = posDelta[index];
        float4 vel = velm[index];
        pos.xyz += delta.xyz;
+#ifdef cl_khr_fp64
+        vel.xyz = convert_float4(invStepSize*convert_double4(delta)).xyz;
+#else
        vel.xyz = invStepSize*delta.xyz;
+#endif
        posq[index] = pos;
        velm[index] = vel;
        index += get_global_size(0);

--- a/platforms/opencl/src/kernels/verlet.cl
+++ b/platforms/opencl/src/kernels/verlet.cl
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
 /**
 * Perform the first step of verlet integration.
 */
@@ -24,7 +28,11 @@ __kernel void integrateVerletPart1(int numAtoms, __global float2* dt, __global f
 __kernel void integrateVerletPart2(int numAtoms, __global float2* dt, __global float4* posq, __global float4* velm, __global float4* posDelta) {
    float2 stepSize = dt[0];
+#ifdef cl_khr_fp64
+    double oneOverDt = 1.0/stepSize.y;
+#else
    float oneOverDt = 1.0f/stepSize.y;
+#endif
    if (get_global_id(0) == 0)
        dt[0].x = stepSize.y;
    barrier(CLK_LOCAL_MEM_FENCE);
@@ -34,7 +42,11 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* dt, __global f
        float4 delta = posDelta[index];
        float4 velocity = velm[index];
        pos.xyz += delta.xyz;
+#ifdef cl_khr_fp64
+        velocity.xyz = convert_float4(convert_double4(delta)*oneOverDt).xyz;
+#else
        velocity.xyz = delta.xyz*oneOverDt;
+#endif
        posq[index] = pos;
        velm[index] = velocity;
        index += get_global_size(0);