Commit 18355094 authored by Peter Eastman's avatar Peter Eastman
Browse files

When converting to fixed point, multiply by 0x100000000 instead of 0xFFFFFFFF....

When converting to fixed point, multiply by 0x100000000 instead of 0xFFFFFFFF.  This should be (very very slightly) more accurate, since its reciprocal can be exactly represented in floating point.
parent a6bb39a3
...@@ -134,7 +134,7 @@ void computeN2Value(__global const real4* restrict posq, __local real4* restrict ...@@ -134,7 +134,7 @@ void computeN2Value(__global const real4* restrict posq, __local real4* restrict
if (get_local_id(0) < TILE_SIZE) { if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF)); atom_add(&global_value[offset], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0x100000000));
#else #else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS; const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS;
...@@ -216,8 +216,8 @@ void computeN2Value(__global const real4* restrict posq, __local real4* restrict ...@@ -216,8 +216,8 @@ void computeN2Value(__global const real4* restrict posq, __local real4* restrict
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset1 = x*TILE_SIZE + tgx; const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx; const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&global_value[offset1], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF)); atom_add(&global_value[offset1], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0x100000000));
atom_add(&global_value[offset2], (long) ((local_value[get_local_id(0)] + local_value[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF)); atom_add(&global_value[offset2], (long) ((local_value[get_local_id(0)] + local_value[get_local_id(0)+TILE_SIZE])*0x100000000));
#else #else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS; const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
......
...@@ -241,11 +241,11 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -241,11 +241,11 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (value*0xFFFFFFFF)); atom_add(&global_value[offset], (long) (value*0x100000000));
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (local_value[get_local_id(0)]*0xFFFFFFFF)); atom_add(&global_value[offset], (long) (local_value[get_local_id(0)]*0x100000000));
} }
#else #else
int writeX = (pos < end ? x : -1); int writeX = (pos < end ? x : -1);
......
...@@ -14,7 +14,7 @@ __kernel void computePerParticleValues(int bufferSize, int numBuffers, __global ...@@ -14,7 +14,7 @@ __kernel void computePerParticleValues(int bufferSize, int numBuffers, __global
// Reduce the pairwise value // Reduce the pairwise value
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
real sum = (1.0f/0xFFFFFFFF)*valueBuffers[index]; real sum = (1.0f/0x100000000)*valueBuffers[index];
#else #else
int totalSize = bufferSize*numBuffers; int totalSize = bufferSize*numBuffers;
real sum = valueBuffers[index]; real sum = valueBuffers[index];
......
...@@ -19,7 +19,7 @@ __kernel void reduceBornSum(int bufferSize, int numBuffers, float alpha, float b ...@@ -19,7 +19,7 @@ __kernel void reduceBornSum(int bufferSize, int numBuffers, float alpha, float b
int totalSize = bufferSize*numBuffers; int totalSize = bufferSize*numBuffers;
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
real sum = (1/(real) 0xFFFFFFFF)*bornSum[index]; real sum = (1/(real) 0x100000000)*bornSum[index];
#else #else
real sum = bornSum[index]; real sum = bornSum[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize) for (int i = index+bufferSize; i < totalSize; i += bufferSize)
...@@ -59,7 +59,7 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor ...@@ -59,7 +59,7 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
int totalSize = bufferSize*numBuffers; int totalSize = bufferSize*numBuffers;
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
real force = (1/(real) 0xFFFFFFFF)*bornForceIn[index]; real force = (1/(real) 0x100000000)*bornForceIn[index];
#else #else
real force = bornForce[index]; real force = bornForce[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize) for (int i = index+bufferSize; i < totalSize; i += bufferSize)
......
...@@ -111,7 +111,7 @@ void computeBornSum( ...@@ -111,7 +111,7 @@ void computeBornSum(
if (get_local_id(0) < TILE_SIZE) { if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset], (long) ((bornSum + localTemp[tgx])*0xFFFFFFFF)); atom_add(&global_bornSum[offset], (long) ((bornSum + localTemp[tgx])*0x100000000));
#else #else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS; const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS;
...@@ -195,8 +195,8 @@ void computeBornSum( ...@@ -195,8 +195,8 @@ void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset1 = x*TILE_SIZE + tgx; const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx; const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset1], (long) ((bornSum + localTemp[tgx])*0xFFFFFFFF)); atom_add(&global_bornSum[offset1], (long) ((bornSum + localTemp[tgx])*0x100000000));
atom_add(&global_bornSum[offset2], (long) ((localBornSum[get_local_id(0)] + localBornSum[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF)); atom_add(&global_bornSum[offset2], (long) ((localBornSum[get_local_id(0)] + localBornSum[get_local_id(0)+TILE_SIZE])*0x100000000));
#else #else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS; const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
...@@ -345,10 +345,10 @@ void computeGBSAForce1( ...@@ -345,10 +345,10 @@ void computeGBSAForce1(
if (get_local_id(0) < TILE_SIZE) { if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].temp_x)*0xFFFFFFFF)); atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].temp_x)*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0xFFFFFFFF)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0xFFFFFFFF)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0x100000000));
atom_add(&global_bornForce[offset], (long) ((force.w + localData[tgx].temp_w)*0xFFFFFFFF)); atom_add(&global_bornForce[offset], (long) ((force.w + localData[tgx].temp_w)*0x100000000));
#else #else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS; const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS;
...@@ -442,14 +442,14 @@ void computeGBSAForce1( ...@@ -442,14 +442,14 @@ void computeGBSAForce1(
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
const unsigned int offset1 = x*TILE_SIZE + tgx; const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx; const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].temp_x)*0xFFFFFFFF)); atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].temp_x)*0x100000000));
atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0xFFFFFFFF)); atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0x100000000));
atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0xFFFFFFFF)); atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0x100000000));
atom_add(&global_bornForce[offset1], (long) ((force.w + localData[tgx].temp_w)*0xFFFFFFFF)); atom_add(&global_bornForce[offset1], (long) ((force.w + localData[tgx].temp_w)*0x100000000));
atom_add(&forceBuffers[offset2], (long) ((localForce[get_local_id(0)].x + localForce[get_local_id(0)+TILE_SIZE].x)*0xFFFFFFFF)); atom_add(&forceBuffers[offset2], (long) ((localForce[get_local_id(0)].x + localForce[get_local_id(0)+TILE_SIZE].x)*0x100000000));
atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].y + localForce[get_local_id(0)+TILE_SIZE].y)*0xFFFFFFFF)); atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].y + localForce[get_local_id(0)+TILE_SIZE].y)*0x100000000));
atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].z + localForce[get_local_id(0)+TILE_SIZE].z)*0xFFFFFFFF)); atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].z + localForce[get_local_id(0)+TILE_SIZE].z)*0x100000000));
atom_add(&global_bornForce[offset2], (long) ((localForce[get_local_id(0)].w + localForce[get_local_id(0)+TILE_SIZE].w)*0xFFFFFFFF)); atom_add(&global_bornForce[offset2], (long) ((localForce[get_local_id(0)].w + localForce[get_local_id(0)+TILE_SIZE].w)*0x100000000));
#else #else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS; const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
......
...@@ -264,11 +264,11 @@ __kernel void computeBornSum( ...@@ -264,11 +264,11 @@ __kernel void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset], (long) (bornSum*0xFFFFFFFF)); atom_add(&global_bornSum[offset], (long) (bornSum*0x100000000));
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset], (long) (localData[get_local_id(0)].bornSum*0xFFFFFFFF)); atom_add(&global_bornSum[offset], (long) (localData[get_local_id(0)].bornSum*0x100000000));
} }
#else #else
int writeX = (pos < end ? x : -1); int writeX = (pos < end ? x : -1);
...@@ -593,17 +593,17 @@ __kernel void computeGBSAForce1( ...@@ -593,17 +593,17 @@ __kernel void computeGBSAForce1(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0xFFFFFFFF)); atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0xFFFFFFFF)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0xFFFFFFFF)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
atom_add(&global_bornForce[offset], (long) (force.w*0xFFFFFFFF)); atom_add(&global_bornForce[offset], (long) (force.w*0x100000000));
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0xFFFFFFFF)); atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0xFFFFFFFF)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0xFFFFFFFF)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000));
atom_add(&global_bornForce[offset], (long) (localData[get_local_id(0)].fw*0xFFFFFFFF)); atom_add(&global_bornForce[offset], (long) (localData[get_local_id(0)].fw*0x100000000));
} }
#else #else
int writeX = (pos < end ? x : -1); int writeX = (pos < end ? x : -1);
......
...@@ -161,9 +161,9 @@ void computeNonbonded( ...@@ -161,9 +161,9 @@ void computeNonbonded(
if (get_local_id(0) < TILE_SIZE) { if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].fx)*0xFFFFFFFF)); atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].fx)*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0xFFFFFFFF)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0xFFFFFFFF)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0x100000000));
#else #else
force.x += localData[tgx].fx; force.x += localData[tgx].fx;
force.y += localData[tgx].fy; force.y += localData[tgx].fy;
...@@ -263,12 +263,12 @@ void computeNonbonded( ...@@ -263,12 +263,12 @@ void computeNonbonded(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset1 = x*TILE_SIZE + tgx; const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx; const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].fx)*0xFFFFFFFF)); atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].fx)*0x100000000));
atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0xFFFFFFFF)); atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0x100000000));
atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0xFFFFFFFF)); atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0x100000000));
atom_add(&forceBuffers[offset2], (long) ((localForce[tgx].x + localForce[tgx+TILE_SIZE].x)*0xFFFFFFFF)); atom_add(&forceBuffers[offset2], (long) ((localForce[tgx].x + localForce[tgx+TILE_SIZE].x)*0x100000000));
atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[tgx].y + localForce[tgx+TILE_SIZE].y)*0xFFFFFFFF)); atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[tgx].y + localForce[tgx+TILE_SIZE].y)*0x100000000));
atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[tgx].z + localForce[tgx+TILE_SIZE].z)*0xFFFFFFFF)); atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[tgx].z + localForce[tgx+TILE_SIZE].z)*0x100000000));
#else #else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS; const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
......
...@@ -295,15 +295,15 @@ __kernel void computeNonbonded( ...@@ -295,15 +295,15 @@ __kernel void computeNonbonded(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0xFFFFFFFF)); atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0xFFFFFFFF)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0xFFFFFFFF)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0xFFFFFFFF)); atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0xFFFFFFFF)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0xFFFFFFFF)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000));
} }
#else #else
int writeX = (pos < end ? x : -1); int writeX = (pos < end ? x : -1);
......
...@@ -126,9 +126,9 @@ void gridSpreadCharge(__global const real4* restrict posq, __global const int2* ...@@ -126,9 +126,9 @@ void gridSpreadCharge(__global const real4* restrict posq, __global const int2*
y -= (y >= GRID_SIZE_Y ? GRID_SIZE_Y : 0); y -= (y >= GRID_SIZE_Y ? GRID_SIZE_Y : 0);
z -= (z >= GRID_SIZE_Z ? GRID_SIZE_Z : 0); z -= (z >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
#ifdef USE_DOUBLE_PRECISION #ifdef USE_DOUBLE_PRECISION
atom_add(&pmeGrid[2*(x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z)], (long) (add*0xFFFFFFFF)); atom_add(&pmeGrid[2*(x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z)], (long) (add*0x100000000));
#else #else
atom_add(&pmeGrid[x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z], (long) (add*0xFFFFFFFF)); atom_add(&pmeGrid[x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z], (long) (add*0x100000000));
#endif #endif
} }
} }
...@@ -138,7 +138,7 @@ void gridSpreadCharge(__global const real4* restrict posq, __global const int2* ...@@ -138,7 +138,7 @@ void gridSpreadCharge(__global const real4* restrict posq, __global const int2*
__kernel void finishSpreadCharge(__global long* restrict pmeGrid) { __kernel void finishSpreadCharge(__global long* restrict pmeGrid) {
__global real2* realGrid = (__global real2*) pmeGrid; __global real2* realGrid = (__global real2*) pmeGrid;
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
real scale = EPSILON_FACTOR/(real) 0xFFFFFFFF; real scale = EPSILON_FACTOR/(real) 0x100000000;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) { for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
#ifdef USE_DOUBLE_PRECISION #ifdef USE_DOUBLE_PRECISION
long value = pmeGrid[2*index]; long value = pmeGrid[2*index];
......
...@@ -86,7 +86,7 @@ __kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize, ...@@ -86,7 +86,7 @@ __kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize,
*/ */
__kernel void reduceForces(__global const long* restrict longBuffer, __global real4* restrict buffer, int bufferSize, int numBuffers) { __kernel void reduceForces(__global const long* restrict longBuffer, __global real4* restrict buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers; int totalSize = bufferSize*numBuffers;
real scale = 1/(real) 0xFFFFFFFF; real scale = 1/(real) 0x100000000;
for (int index = get_global_id(0); index < bufferSize; index += get_global_size(0)) { for (int index = get_global_id(0); index < bufferSize; index += get_global_size(0)) {
real4 sum = (real4) (scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0); real4 sum = (real4) (scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0);
for (int i = index; i < totalSize; i += bufferSize) for (int i = index; i < totalSize; i += bufferSize)
......
...@@ -7,7 +7,7 @@ extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum, ...@@ -7,7 +7,7 @@ extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum,
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Get summed Born data // Get summed Born data
real sum = RECIP(0xFFFFFFFF)*bornSum[index]; real sum = RECIP(0x100000000)*bornSum[index];
// Now calculate Born radius. // Now calculate Born radius.
...@@ -33,7 +33,7 @@ extern "C" __global__ void computeSurfaceAreaForce(long long* __restrict__ bornF ...@@ -33,7 +33,7 @@ extern "C" __global__ void computeSurfaceAreaForce(long long* __restrict__ bornF
ratio6 = ratio6*ratio6*ratio6; ratio6 = ratio6*ratio6*ratio6;
ratio6 = ratio6*ratio6; ratio6 = ratio6*ratio6;
real saTerm = SURFACE_AREA_FACTOR * r * r * ratio6; real saTerm = SURFACE_AREA_FACTOR * r * r * ratio6;
bornForce[index] += (long long) (saTerm*0xFFFFFFFF/bornRadius); bornForce[index] += (long long) (saTerm*0x100000000/bornRadius);
energy += saTerm; energy += saTerm;
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] -= energy/6; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] -= energy/6;
...@@ -157,11 +157,11 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ bornS ...@@ -157,11 +157,11 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ bornS
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (data.bornSum*0xFFFFFFFF))); atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (data.bornSum*0x100000000)));
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornSum*0xFFFFFFFF))); atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornSum*0x100000000)));
} }
lasty = y; lasty = y;
pos++; pos++;
...@@ -276,9 +276,9 @@ extern "C" __global__ void computeGKForces( ...@@ -276,9 +276,9 @@ extern "C" __global__ void computeGKForces(
} }
} }
data.force *= 0.5f; data.force *= 0.5f;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques. // Compute torques.
...@@ -292,9 +292,9 @@ extern "C" __global__ void computeGKForces( ...@@ -292,9 +292,9 @@ extern "C" __global__ void computeGKForces(
data.force += tempTorque; data.force += tempTorque;
} }
} }
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute chain rule terms. // Compute chain rule terms.
...@@ -304,7 +304,7 @@ extern "C" __global__ void computeGKForces( ...@@ -304,7 +304,7 @@ extern "C" __global__ void computeGKForces(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
computeOneInteractionB1B2(data, localData[tbx+j]); computeOneInteractionB1B2(data, localData[tbx+j]);
} }
atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF))); atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0x100000000)));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -330,13 +330,13 @@ extern "C" __global__ void computeGKForces( ...@@ -330,13 +330,13 @@ extern "C" __global__ void computeGKForces(
localData[threadIdx.x].force *= 0.5f; localData[threadIdx.x].force *= 0.5f;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
// Compute torques. // Compute torques.
...@@ -358,13 +358,13 @@ extern "C" __global__ void computeGKForces( ...@@ -358,13 +358,13 @@ extern "C" __global__ void computeGKForces(
} }
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
// Compute chain rule terms. // Compute chain rule terms.
...@@ -379,9 +379,9 @@ extern "C" __global__ void computeGKForces( ...@@ -379,9 +379,9 @@ extern "C" __global__ void computeGKForces(
} }
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF))); atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (data.bornForce*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornForce*0xFFFFFFFF))); atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornForce*0x100000000)));
} }
} }
} }
...@@ -405,7 +405,7 @@ inline __device__ void loadAtomData3(AtomData3& data, int atom, const real4* __r ...@@ -405,7 +405,7 @@ inline __device__ void loadAtomData3(AtomData3& data, int atom, const real4* __r
float2 params1 = params[atom]; float2 params1 = params[atom];
data.radius = params1.x; data.radius = params1.x;
data.scaledRadius = params1.y; data.scaledRadius = params1.y;
data.bornForce = bornForce[atom]/(real) 0xFFFFFFFF; data.bornForce = bornForce[atom]/(real) 0x100000000;
} }
__device__ void computeBornChainRuleInteraction(AtomData3& atom1, AtomData3& atom2, real3& force) { __device__ void computeBornChainRuleInteraction(AtomData3& atom1, AtomData3& atom2, real3& force) {
...@@ -506,9 +506,9 @@ extern "C" __global__ void computeChainRuleForce( ...@@ -506,9 +506,9 @@ extern "C" __global__ void computeChainRuleForce(
localData[tbx+j].force += tempForce; localData[tbx+j].force += tempForce;
} }
} }
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) ((data.force.x+localData[threadIdx.x].force.x)*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) ((data.force.x+localData[threadIdx.x].force.x)*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.y+localData[threadIdx.x].force.y)*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.y+localData[threadIdx.x].force.y)*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.z+localData[threadIdx.x].force.z)*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.z+localData[threadIdx.x].force.z)*0x100000000)));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -532,13 +532,13 @@ extern "C" __global__ void computeChainRuleForce( ...@@ -532,13 +532,13 @@ extern "C" __global__ void computeChainRuleForce(
} }
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
} }
} }
...@@ -687,9 +687,9 @@ extern "C" __global__ void computeEDiffForce( ...@@ -687,9 +687,9 @@ extern "C" __global__ void computeEDiffForce(
} }
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques. // Compute torques.
...@@ -706,9 +706,9 @@ extern "C" __global__ void computeEDiffForce( ...@@ -706,9 +706,9 @@ extern "C" __global__ void computeEDiffForce(
} }
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -740,13 +740,13 @@ extern "C" __global__ void computeEDiffForce( ...@@ -740,13 +740,13 @@ extern "C" __global__ void computeEDiffForce(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
// Compute torques. // Compute torques.
...@@ -770,13 +770,13 @@ extern "C" __global__ void computeEDiffForce( ...@@ -770,13 +770,13 @@ extern "C" __global__ void computeEDiffForce(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
} }
} }
......
...@@ -246,14 +246,14 @@ extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forc ...@@ -246,14 +246,14 @@ extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forc
tj = (tj+1) & (TILE_SIZE-1); tj = (tj+1) & (TILE_SIZE-1);
} }
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
} }
pos++; pos++;
......
...@@ -158,9 +158,9 @@ extern "C" __global__ void computeElectrostatics( ...@@ -158,9 +158,9 @@ extern "C" __global__ void computeElectrostatics(
} }
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques. // Compute torques.
...@@ -177,9 +177,9 @@ extern "C" __global__ void computeElectrostatics( ...@@ -177,9 +177,9 @@ extern "C" __global__ void computeElectrostatics(
} }
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -243,13 +243,13 @@ extern "C" __global__ void computeElectrostatics( ...@@ -243,13 +243,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
// Compute torques. // Compute torques.
...@@ -301,13 +301,13 @@ extern "C" __global__ void computeElectrostatics( ...@@ -301,13 +301,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
} }
} }
...@@ -341,13 +341,13 @@ extern "C" __global__ void computeElectrostatics( ...@@ -341,13 +341,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
// Compute torques. // Compute torques.
...@@ -372,13 +372,13 @@ extern "C" __global__ void computeElectrostatics( ...@@ -372,13 +372,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
} }
} }
} }
......
...@@ -637,30 +637,30 @@ extern "C" __global__ void computeFixedField( ...@@ -637,30 +637,30 @@ extern "C" __global__ void computeFixedField(
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (data.field.x*0xFFFFFFFF))); atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0xFFFFFFFF))); atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0xFFFFFFFF))); atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK #ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (data.gkField.x*0xFFFFFFFF))); atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (data.gkField.x*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.y*0xFFFFFFFF))); atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.y*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.z*0xFFFFFFFF))); atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.z*0x100000000)));
#endif #endif
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0xFFFFFFFF))); atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0xFFFFFFFF))); atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0xFFFFFFFF))); atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK #ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.x*0xFFFFFFFF))); atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.x*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.y*0xFFFFFFFF))); atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.y*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.z*0xFFFFFFFF))); atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.z*0x100000000)));
#endif #endif
} }
pos++; pos++;
......
...@@ -387,36 +387,36 @@ extern "C" __global__ void computeInducedField( ...@@ -387,36 +387,36 @@ extern "C" __global__ void computeInducedField(
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0xFFFFFFFF))); atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0xFFFFFFFF))); atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0xFFFFFFFF))); atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK #ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0xFFFFFFFF))); atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0xFFFFFFFF))); atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0xFFFFFFFF))); atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0xFFFFFFFF))); atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0xFFFFFFFF))); atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0xFFFFFFFF))); atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif #endif
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0xFFFFFFFF))); atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0xFFFFFFFF))); atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0xFFFFFFFF))); atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK #ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0xFFFFFFFF))); atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0xFFFFFFFF))); atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0xFFFFFFFF))); atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0xFFFFFFFF))); atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0xFFFFFFFF))); atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0xFFFFFFFF))); atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0x100000000)));
#endif #endif
} }
pos++; pos++;
...@@ -433,7 +433,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ ...@@ -433,7 +433,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
#else #else
const real ewaldScale = 0; const real ewaldScale = 0;
#endif #endif
const real fieldScale = 1/(real) 0xFFFFFFFF; const real fieldScale = 1/(real) 0x100000000;
real sumErrors = 0; real sumErrors = 0;
real sumPolarErrors = 0; real sumPolarErrors = 0;
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) { for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
......
...@@ -478,13 +478,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict ...@@ -478,13 +478,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi[20*m+18] = tuv012; phi[20*m+18] = tuv012;
phi[20*m+19] = tuv111; phi[20*m+19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI; real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0xFFFFFFFF); long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0x100000000);
fieldBuffers[m] = fieldx; fieldBuffers[m] = fieldx;
fieldPolarBuffers[m] = fieldx; fieldPolarBuffers[m] = fieldx;
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*invPeriodicBoxSize.y*tuv010)*0xFFFFFFFF); long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*invPeriodicBoxSize.y*tuv010)*0x100000000);
fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy; fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
fieldPolarBuffers[m+PADDED_NUM_ATOMS] = fieldy; fieldPolarBuffers[m+PADDED_NUM_ATOMS] = fieldy;
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*invPeriodicBoxSize.z*tuv001)*0xFFFFFFFF); long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*invPeriodicBoxSize.z*tuv001)*0x100000000);
fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz; fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
fieldPolarBuffers[m+2*PADDED_NUM_ATOMS] = fieldz; fieldPolarBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
} }
...@@ -723,17 +723,17 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -723,17 +723,17 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
torqueBuffers[i] = (long long) (EPSILON_FACTOR*(multipole[3]*yscale*phi[2] - multipole[2]*zscale*phi[3] torqueBuffers[i] = (long long) (EPSILON_FACTOR*(multipole[3]*yscale*phi[2] - multipole[2]*zscale*phi[3]
+ 2*(multipole[6]-multipole[5])*yscale*zscale*phi[9] + 2*(multipole[6]-multipole[5])*yscale*zscale*phi[9]
+ multipole[8]*xscale*yscale*phi[7] + multipole[9]*yscale*yscale*phi[5] + multipole[8]*xscale*yscale*phi[7] + multipole[9]*yscale*yscale*phi[5]
- multipole[7]*xscale*zscale*phi[8] - multipole[9]*zscale*zscale*phi[6])*0xFFFFFFFF); - multipole[7]*xscale*zscale*phi[8] - multipole[9]*zscale*zscale*phi[6])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS] = (long long) (EPSILON_FACTOR*(multipole[1]*zscale*phi[3] - multipole[3]*xscale*phi[1] torqueBuffers[i+PADDED_NUM_ATOMS] = (long long) (EPSILON_FACTOR*(multipole[1]*zscale*phi[3] - multipole[3]*xscale*phi[1]
+ 2*(multipole[4]-multipole[6])*xscale*zscale*phi[8] + 2*(multipole[4]-multipole[6])*xscale*zscale*phi[8]
+ multipole[7]*yscale*zscale*phi[9] + multipole[8]*zscale*zscale*phi[6] + multipole[7]*yscale*zscale*phi[9] + multipole[8]*zscale*zscale*phi[6]
- multipole[8]*xscale*xscale*phi[4] - multipole[9]*xscale*yscale*phi[7])*0xFFFFFFFF); - multipole[8]*xscale*xscale*phi[4] - multipole[9]*xscale*yscale*phi[7])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS*2] = (long long) (EPSILON_FACTOR*(multipole[2]*xscale*phi[1] - multipole[1]*yscale*phi[2] torqueBuffers[i+PADDED_NUM_ATOMS*2] = (long long) (EPSILON_FACTOR*(multipole[2]*xscale*phi[1] - multipole[1]*yscale*phi[2]
+ 2*(multipole[5]-multipole[4])*xscale*yscale*phi[7] + 2*(multipole[5]-multipole[4])*xscale*yscale*phi[7]
+ multipole[7]*xscale*xscale*phi[4] + multipole[9]*xscale*zscale*phi[8] + multipole[7]*xscale*xscale*phi[4] + multipole[9]*xscale*zscale*phi[8]
- multipole[7]*yscale*yscale*phi[5] - multipole[8]*yscale*zscale*phi[9])*0xFFFFFFFF); - multipole[7]*yscale*yscale*phi[5] - multipole[8]*yscale*zscale*phi[9])*0x100000000);
// Compute the force and energy. // Compute the force and energy.
...@@ -757,9 +757,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -757,9 +757,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
f.x *= EPSILON_FACTOR*xscale; f.x *= EPSILON_FACTOR*xscale;
f.y *= EPSILON_FACTOR*yscale; f.y *= EPSILON_FACTOR*yscale;
f.z *= EPSILON_FACTOR*zscale; f.z *= EPSILON_FACTOR*zscale;
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0xFFFFFFFF)); forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0xFFFFFFFF)); forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0xFFFFFFFF)); forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0x100000000));
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy;
} }
...@@ -801,17 +801,17 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -801,17 +801,17 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
torqueBuffers[i] += (long long) (0.5f*EPSILON_FACTOR*(multipole[3]*yscale*phidp[2] - multipole[2]*zscale*phidp[3] torqueBuffers[i] += (long long) (0.5f*EPSILON_FACTOR*(multipole[3]*yscale*phidp[2] - multipole[2]*zscale*phidp[3]
+ 2*(multipole[6]-multipole[5])*yscale*zscale*phidp[9] + 2*(multipole[6]-multipole[5])*yscale*zscale*phidp[9]
+ multipole[8]*xscale*yscale*phidp[7] + multipole[9]*yscale*yscale*phidp[5] + multipole[8]*xscale*yscale*phidp[7] + multipole[9]*yscale*yscale*phidp[5]
- multipole[7]*xscale*zscale*phidp[8] - multipole[9]*zscale*zscale*phidp[6])*0xFFFFFFFF); - multipole[7]*xscale*zscale*phidp[8] - multipole[9]*zscale*zscale*phidp[6])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS] += (long long) (0.5f*EPSILON_FACTOR*(multipole[1]*zscale*phidp[3] - multipole[3]*xscale*phidp[1] torqueBuffers[i+PADDED_NUM_ATOMS] += (long long) (0.5f*EPSILON_FACTOR*(multipole[1]*zscale*phidp[3] - multipole[3]*xscale*phidp[1]
+ 2*(multipole[4]-multipole[6])*xscale*zscale*phidp[8] + 2*(multipole[4]-multipole[6])*xscale*zscale*phidp[8]
+ multipole[7]*yscale*zscale*phidp[9] + multipole[8]*zscale*zscale*phidp[6] + multipole[7]*yscale*zscale*phidp[9] + multipole[8]*zscale*zscale*phidp[6]
- multipole[8]*xscale*xscale*phidp[4] - multipole[9]*xscale*yscale*phidp[7])*0xFFFFFFFF); - multipole[8]*xscale*xscale*phidp[4] - multipole[9]*xscale*yscale*phidp[7])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS*2] += (long long) (0.5f*EPSILON_FACTOR*(multipole[2]*xscale*phidp[1] - multipole[1]*yscale*phidp[2] torqueBuffers[i+PADDED_NUM_ATOMS*2] += (long long) (0.5f*EPSILON_FACTOR*(multipole[2]*xscale*phidp[1] - multipole[1]*yscale*phidp[2]
+ 2*(multipole[5]-multipole[4])*xscale*yscale*phidp[7] + 2*(multipole[5]-multipole[4])*xscale*yscale*phidp[7]
+ multipole[7]*xscale*xscale*phidp[4] + multipole[9]*xscale*zscale*phidp[8] + multipole[7]*xscale*xscale*phidp[4] + multipole[9]*xscale*zscale*phidp[8]
- multipole[7]*yscale*yscale*phidp[5] - multipole[8]*yscale*zscale*phidp[9])*0xFFFFFFFF); - multipole[7]*yscale*yscale*phidp[5] - multipole[8]*yscale*zscale*phidp[9])*0x100000000);
// Compute the force and energy. // Compute the force and energy.
...@@ -865,18 +865,18 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -865,18 +865,18 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
f.x *= 0.5f*EPSILON_FACTOR*xscale; f.x *= 0.5f*EPSILON_FACTOR*xscale;
f.y *= 0.5f*EPSILON_FACTOR*yscale; f.y *= 0.5f*EPSILON_FACTOR*yscale;
f.z *= 0.5f*EPSILON_FACTOR*zscale; f.z *= 0.5f*EPSILON_FACTOR*zscale;
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0xFFFFFFFF)); forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0xFFFFFFFF)); forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0xFFFFFFFF)); forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0x100000000));
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy;
} }
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip,
long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real4 invPeriodicBoxSize) { long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real4 invPeriodicBoxSize) {
real xscale = GRID_SIZE_X*invPeriodicBoxSize.x*0xFFFFFFFF; real xscale = GRID_SIZE_X*invPeriodicBoxSize.x*0x100000000;
real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y*0xFFFFFFFF; real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y*0x100000000;
real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z*0xFFFFFFFF; real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z*0x100000000;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (xscale*phid[10*i+1]); inducedField[i] -= (long long) (xscale*phid[10*i+1]);
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (yscale*phid[10*i+2]); inducedField[i+PADDED_NUM_ATOMS] -= (long long) (yscale*phid[10*i+2]);
......
...@@ -212,7 +212,7 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi ...@@ -212,7 +212,7 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi
#endif #endif
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) { real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF; real scale = polarizability[atom]/(real) 0x100000000;
inducedDipole[3*atom] = scale*fieldBuffers[atom]; inducedDipole[3*atom] = scale*fieldBuffers[atom];
inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS]; inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2]; inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
...@@ -263,7 +263,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for ...@@ -263,7 +263,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
const int Z = 2; const int Z = 2;
const int I = 3; const int I = 3;
const real torqueScale = RECIP((double) 0xFFFFFFFF); const real torqueScale = RECIP((double) 0x100000000);
real3 forces[4]; real3 forces[4];
real norms[LastVectorIndex]; real norms[LastVectorIndex];
...@@ -423,22 +423,22 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for ...@@ -423,22 +423,22 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
// Store results // Store results
atomicAdd(&forceBuffers[particles.z], static_cast<unsigned long long>((long long) (forces[Z].x*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.z], static_cast<unsigned long long>((long long) (forces[Z].x*0x100000000)));
atomicAdd(&forceBuffers[particles.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].y*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].y*0x100000000)));
atomicAdd(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].z*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].z*0x100000000)));
if (axisType != 4) { if (axisType != 4) {
atomicAdd(&forceBuffers[particles.x], static_cast<unsigned long long>((long long) (forces[X].x*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.x], static_cast<unsigned long long>((long long) (forces[X].x*0x100000000)));
atomicAdd(&forceBuffers[particles.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].y*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].y*0x100000000)));
atomicAdd(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].z*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].z*0x100000000)));
} }
if ((axisType == 2 || axisType == 3) && particles.y > -1) { if ((axisType == 2 || axisType == 3) && particles.y > -1) {
atomicAdd(&forceBuffers[particles.y], static_cast<unsigned long long>((long long) (forces[Y].x*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.y], static_cast<unsigned long long>((long long) (forces[Y].x*0x100000000)));
atomicAdd(&forceBuffers[particles.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].y*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].y*0x100000000)));
atomicAdd(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].z*0xFFFFFFFF))); atomicAdd(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].z*0x100000000)));
} }
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (forces[I].x*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (forces[I].x*0x100000000)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].y*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].y*0x100000000)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].z*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].z*0x100000000)));
} }
} }
} }
......
...@@ -282,12 +282,12 @@ extern "C" __global__ void computeElectrostatics( ...@@ -282,12 +282,12 @@ extern "C" __global__ void computeElectrostatics(
computeSelfEnergyAndTorque(data, energy); computeSelfEnergyAndTorque(data, energy);
data.force *= -ENERGY_SCALE_FACTOR; data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR; data.torque *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.torque.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -368,19 +368,19 @@ extern "C" __global__ void computeElectrostatics( ...@@ -368,19 +368,19 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].torque *= -ENERGY_SCALE_FACTOR; localData[threadIdx.x].torque *= -ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
} }
} }
} }
...@@ -411,19 +411,19 @@ extern "C" __global__ void computeElectrostatics( ...@@ -411,19 +411,19 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].torque *= ENERGY_SCALE_FACTOR; localData[threadIdx.x].torque *= ENERGY_SCALE_FACTOR;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
} }
} }
} }
......
...@@ -88,7 +88,7 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long* ...@@ -88,7 +88,7 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
const int indexInBlock = threadIdx.x-blockStart; const int indexInBlock = threadIdx.x-blockStart;
const mixed nkT = NUM_COPIES*kT; const mixed nkT = NUM_COPIES*kT;
const mixed twown = 2.0f*nkT/HBAR; const mixed twown = 2.0f*nkT/HBAR;
const mixed forceScale = 1/(mixed) 0xFFFFFFFF; const mixed forceScale = 1/(mixed) 0x100000000;
__shared__ mixed3 q[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 q[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 v[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 v[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 temp[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
...@@ -165,7 +165,7 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix ...@@ -165,7 +165,7 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix
const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES; const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES); const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart; const int indexInBlock = threadIdx.x-blockStart;
const mixed forceScale = 1/(mixed) 0xFFFFFFFF; const mixed forceScale = 1/(mixed) 0x100000000;
// Update velocities. // Update velocities.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment