Commit 6d7f0273 authored by peastman's avatar peastman
Browse files

Improved integration accuracy on devices that don't support double precision

parent 09970632
...@@ -32,7 +32,12 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con ...@@ -32,7 +32,12 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
*/ */
extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real4* __restrict__ posqCorrection, const mixed4* __restrict__ posDelta, mixed4* __restrict__ velm, const mixed2* __restrict__ dt) { extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real4* __restrict__ posqCorrection, const mixed4* __restrict__ posDelta, mixed4* __restrict__ velm, const mixed2* __restrict__ dt) {
#if __CUDA_ARCH__ >= 130
double invStepSize = 1.0/dt[0].y; double invStepSize = 1.0/dt[0].y;
#else
float invStepSize = 1.0f/dt[0].y;
float correction = (1.0f-invStepSize*dt[0].y)/dt[0].y;
#endif
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
mixed4 vel = velm[index]; mixed4 vel = velm[index];
...@@ -48,9 +53,15 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real ...@@ -48,9 +53,15 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
pos.x += delta.x; pos.x += delta.x;
pos.y += delta.y; pos.y += delta.y;
pos.z += delta.z; pos.z += delta.z;
#if __CUDA_ARCH__ >= 130
vel.x = (mixed) (invStepSize*delta.x); vel.x = (mixed) (invStepSize*delta.x);
vel.y = (mixed) (invStepSize*delta.y); vel.y = (mixed) (invStepSize*delta.y);
vel.z = (mixed) (invStepSize*delta.z); vel.z = (mixed) (invStepSize*delta.z);
#else
vel.x = invStepSize*delta.x + correction*delta.x;
vel.y = invStepSize*delta.y + correction*delta.x;
vel.z = invStepSize*delta.z + correction*delta.x;
#endif
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w); posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0); posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
......
...@@ -37,7 +37,12 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c ...@@ -37,7 +37,12 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4* __restrict__ posq, extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4* __restrict__ posq,
real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const mixed4* __restrict__ posDelta) { real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const mixed4* __restrict__ posDelta) {
mixed2 stepSize = dt[0]; mixed2 stepSize = dt[0];
#if __CUDA_ARCH__ >= 130
double oneOverDt = 1.0/stepSize.y; double oneOverDt = 1.0/stepSize.y;
#else
float oneOverDt = 1.0f/stepSize.y;
float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
#endif
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
if (index == 0) if (index == 0)
dt[0].x = stepSize.y; dt[0].x = stepSize.y;
...@@ -55,7 +60,11 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4* ...@@ -55,7 +60,11 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
pos.x += delta.x; pos.x += delta.x;
pos.y += delta.y; pos.y += delta.y;
pos.z += delta.z; pos.z += delta.z;
#if __CUDA_ARCH__ >= 130
velocity = make_mixed4((mixed) (delta.x*oneOverDt), (mixed) (delta.y*oneOverDt), (mixed) (delta.z*oneOverDt), velocity.w); velocity = make_mixed4((mixed) (delta.x*oneOverDt), (mixed) (delta.y*oneOverDt), (mixed) (delta.z*oneOverDt), velocity.w);
#else
velocity = make_mixed4((mixed) (delta.x*oneOverDt+delta.x*correction), (mixed) (delta.y*oneOverDt+delta.y*correction), (mixed) (delta.z*oneOverDt+delta.z*correction), velocity.w);
#endif
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w); posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0); posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
......
...@@ -36,6 +36,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea ...@@ -36,6 +36,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea
double invStepSize = 1.0/dt[0].y; double invStepSize = 1.0/dt[0].y;
#else #else
float invStepSize = 1.0f/dt[0].y; float invStepSize = 1.0f/dt[0].y;
float correction = (1.0f-invStepSize*dt[0].y)/dt[0].y;
#endif #endif
int index = get_global_id(0); int index = get_global_id(0);
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
...@@ -53,7 +54,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea ...@@ -53,7 +54,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea
#ifdef SUPPORTS_DOUBLE_PRECISION #ifdef SUPPORTS_DOUBLE_PRECISION
vel.xyz = convert_mixed4(invStepSize*convert_double4(delta)).xyz; vel.xyz = convert_mixed4(invStepSize*convert_double4(delta)).xyz;
#else #else
vel.xyz = invStepSize*delta.xyz; vel.xyz = invStepSize*delta.xyz + correction*delta.xyz;
#endif #endif
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos); posq[index] = convert_real4(pos);
......
...@@ -38,6 +38,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _ ...@@ -38,6 +38,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _
double oneOverDt = 1.0/stepSize.y; double oneOverDt = 1.0/stepSize.y;
#else #else
float oneOverDt = 1.0f/stepSize.y; float oneOverDt = 1.0f/stepSize.y;
float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
#endif #endif
if (get_global_id(0) == 0) if (get_global_id(0) == 0)
dt[0].x = stepSize.y; dt[0].x = stepSize.y;
...@@ -58,7 +59,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _ ...@@ -58,7 +59,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _
#ifdef SUPPORTS_DOUBLE_PRECISION #ifdef SUPPORTS_DOUBLE_PRECISION
velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz; velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz;
#else #else
velocity.xyz = delta.xyz*oneOverDt; velocity.xyz = delta.xyz*oneOverDt + delta.xyz*correction;
#endif #endif
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos); posq[index] = convert_real4(pos);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment