Commit 036c3ff4 authored by Peter Eastman's avatar Peter Eastman
Browse files

Bug fix (bug 1051)

parent 140081ec
...@@ -230,22 +230,16 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -230,22 +230,16 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
} }
// Write results // Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP #ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride; unsigned int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset]; #else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
float4 of = cSim.pForce4a[offset];
of.x += af.x; of.x += af.x;
of.y += af.y; of.y += af.y;
of.z += af.z; of.z += af.z;
cSim.pForce4a[offset] = of; cSim.pForce4a[offset] = of;
#else
of.x = af.x;
of.y = af.y;
of.z = af.z;
of.w = 0.0f;
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
#endif
} }
else // 100% utilization else // 100% utilization
{ {
...@@ -530,31 +524,25 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -530,31 +524,25 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
// Write results // Write results
float4 of; float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP #ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride; unsigned int offset = x + tgx + warp*cSim.stride;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4a[offset]; of = cSim.pForce4a[offset];
of.x += af.x; of.x += af.x;
of.y += af.y; of.y += af.y;
of.z += af.z; of.z += af.z;
cSim.pForce4a[offset] = of; cSim.pForce4a[offset] = of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset = y + tgx + warp*cSim.stride; offset = y + tgx + warp*cSim.stride;
#else
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4a[offset]; of = cSim.pForce4a[offset];
of.x += sA[threadIdx.x].fx; of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy; of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz; of.z += sA[threadIdx.x].fz;
cSim.pForce4a[offset] = of; cSim.pForce4a[offset] = of;
#else
of.x = af.x;
of.y = af.y;
of.z = af.z;
of.w = 0.0f;
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
of.x = sA[threadIdx.x].fx;
of.y = sA[threadIdx.x].fy;
of.z = sA[threadIdx.x].fz;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = of;
#endif
lasty = y; lasty = y;
} }
......
...@@ -278,6 +278,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -278,6 +278,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
// Write results // Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP #ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride; unsigned int offset = x + tgx + warp*cSim.stride;
#else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
float4 of = cSim.pForce4a[offset]; float4 of = cSim.pForce4a[offset];
of.x += af.x; of.x += af.x;
of.y += af.y; of.y += af.y;
...@@ -285,11 +288,6 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -285,11 +288,6 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
of.w += af.w; of.w += af.w;
cSim.pForce4a[offset] = of; cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = of.w; cSim.pBornForce[offset] = of.w;
#else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
#endif
} }
else // 100% utilization else // 100% utilization
{ {
...@@ -662,6 +660,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -662,6 +660,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
// Write results // Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP #ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride; unsigned int offset = x + tgx + warp*cSim.stride;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
#endif
float4 of = cSim.pForce4a[offset]; float4 of = cSim.pForce4a[offset];
of.x += af.x; of.x += af.x;
of.y += af.y; of.y += af.y;
...@@ -669,7 +670,11 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -669,7 +670,11 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
of.w += af.w; of.w += af.w;
cSim.pForce4a[offset] = of; cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = of.w; cSim.pBornForce[offset] = of.w;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset = y + tgx + warp*cSim.stride; offset = y + tgx + warp*cSim.stride;
#else
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4a[offset]; of = cSim.pForce4a[offset];
of.x += sA[threadIdx.x].fx; of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy; of.y += sA[threadIdx.x].fy;
...@@ -677,18 +682,6 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -677,18 +682,6 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
of.w += sA[threadIdx.x].fb; of.w += sA[threadIdx.x].fb;
cSim.pForce4a[offset] = of; cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = of.w; cSim.pBornForce[offset] = of.w;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
af.x = sA[threadIdx.x].fx;
af.y = sA[threadIdx.x].fy;
af.z = sA[threadIdx.x].fz;
af.w = sA[threadIdx.x].fb;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
#endif
lasty = y; lasty = y;
} }
pos++; pos++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment