Commit 18355094 authored by Peter Eastman's avatar Peter Eastman
Browse files

When converting to fixed point, multiply by 0x100000000 instead of 0xFFFFFFFF....

When converting to fixed point, multiply by 0x100000000 instead of 0xFFFFFFFF.  This should be (very very slightly) more accurate, since its reciprocal can be exactly represented in floating point.
parent a6bb39a3
......@@ -134,7 +134,7 @@ void computeN2Value(__global const real4* restrict posq, __local real4* restrict
if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF));
atom_add(&global_value[offset], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0x100000000));
#else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS;
......@@ -216,8 +216,8 @@ void computeN2Value(__global const real4* restrict posq, __local real4* restrict
#ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&global_value[offset1], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF));
atom_add(&global_value[offset2], (long) ((local_value[get_local_id(0)] + local_value[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF));
atom_add(&global_value[offset1], (long) ((value + tempBuffer[get_local_id(0)+TILE_SIZE])*0x100000000));
atom_add(&global_value[offset2], (long) ((local_value[get_local_id(0)] + local_value[get_local_id(0)+TILE_SIZE])*0x100000000));
#else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
......
......@@ -241,11 +241,11 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (value*0xFFFFFFFF));
atom_add(&global_value[offset], (long) (value*0x100000000));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (local_value[get_local_id(0)]*0xFFFFFFFF));
atom_add(&global_value[offset], (long) (local_value[get_local_id(0)]*0x100000000));
}
#else
int writeX = (pos < end ? x : -1);
......
......@@ -14,7 +14,7 @@ __kernel void computePerParticleValues(int bufferSize, int numBuffers, __global
// Reduce the pairwise value
#ifdef SUPPORTS_64_BIT_ATOMICS
real sum = (1.0f/0xFFFFFFFF)*valueBuffers[index];
real sum = (1.0f/0x100000000)*valueBuffers[index];
#else
int totalSize = bufferSize*numBuffers;
real sum = valueBuffers[index];
......
......@@ -19,7 +19,7 @@ __kernel void reduceBornSum(int bufferSize, int numBuffers, float alpha, float b
int totalSize = bufferSize*numBuffers;
#ifdef SUPPORTS_64_BIT_ATOMICS
real sum = (1/(real) 0xFFFFFFFF)*bornSum[index];
real sum = (1/(real) 0x100000000)*bornSum[index];
#else
real sum = bornSum[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
......@@ -59,7 +59,7 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
int totalSize = bufferSize*numBuffers;
#ifdef SUPPORTS_64_BIT_ATOMICS
real force = (1/(real) 0xFFFFFFFF)*bornForceIn[index];
real force = (1/(real) 0x100000000)*bornForceIn[index];
#else
real force = bornForce[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
......
......@@ -111,7 +111,7 @@ void computeBornSum(
if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset], (long) ((bornSum + localTemp[tgx])*0xFFFFFFFF));
atom_add(&global_bornSum[offset], (long) ((bornSum + localTemp[tgx])*0x100000000));
#else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS;
......@@ -195,8 +195,8 @@ void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset1], (long) ((bornSum + localTemp[tgx])*0xFFFFFFFF));
atom_add(&global_bornSum[offset2], (long) ((localBornSum[get_local_id(0)] + localBornSum[get_local_id(0)+TILE_SIZE])*0xFFFFFFFF));
atom_add(&global_bornSum[offset1], (long) ((bornSum + localTemp[tgx])*0x100000000));
atom_add(&global_bornSum[offset2], (long) ((localBornSum[get_local_id(0)] + localBornSum[get_local_id(0)+TILE_SIZE])*0x100000000));
#else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
......@@ -345,10 +345,10 @@ void computeGBSAForce1(
if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].temp_x)*0xFFFFFFFF));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0xFFFFFFFF));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0xFFFFFFFF));
atom_add(&global_bornForce[offset], (long) ((force.w + localData[tgx].temp_w)*0xFFFFFFFF));
atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].temp_x)*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0x100000000));
atom_add(&global_bornForce[offset], (long) ((force.w + localData[tgx].temp_w)*0x100000000));
#else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset = x*TILE_SIZE + tgx + x*PADDED_NUM_ATOMS;
......@@ -442,14 +442,14 @@ void computeGBSAForce1(
barrier(CLK_LOCAL_MEM_FENCE);
const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].temp_x)*0xFFFFFFFF));
atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0xFFFFFFFF));
atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0xFFFFFFFF));
atom_add(&global_bornForce[offset1], (long) ((force.w + localData[tgx].temp_w)*0xFFFFFFFF));
atom_add(&forceBuffers[offset2], (long) ((localForce[get_local_id(0)].x + localForce[get_local_id(0)+TILE_SIZE].x)*0xFFFFFFFF));
atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].y + localForce[get_local_id(0)+TILE_SIZE].y)*0xFFFFFFFF));
atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].z + localForce[get_local_id(0)+TILE_SIZE].z)*0xFFFFFFFF));
atom_add(&global_bornForce[offset2], (long) ((localForce[get_local_id(0)].w + localForce[get_local_id(0)+TILE_SIZE].w)*0xFFFFFFFF));
atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].temp_x)*0x100000000));
atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].temp_y)*0x100000000));
atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].temp_z)*0x100000000));
atom_add(&global_bornForce[offset1], (long) ((force.w + localData[tgx].temp_w)*0x100000000));
atom_add(&forceBuffers[offset2], (long) ((localForce[get_local_id(0)].x + localForce[get_local_id(0)+TILE_SIZE].x)*0x100000000));
atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].y + localForce[get_local_id(0)+TILE_SIZE].y)*0x100000000));
atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[get_local_id(0)].z + localForce[get_local_id(0)+TILE_SIZE].z)*0x100000000));
atom_add(&global_bornForce[offset2], (long) ((localForce[get_local_id(0)].w + localForce[get_local_id(0)+TILE_SIZE].w)*0x100000000));
#else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
......
......@@ -264,11 +264,11 @@ __kernel void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset], (long) (bornSum*0xFFFFFFFF));
atom_add(&global_bornSum[offset], (long) (bornSum*0x100000000));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&global_bornSum[offset], (long) (localData[get_local_id(0)].bornSum*0xFFFFFFFF));
atom_add(&global_bornSum[offset], (long) (localData[get_local_id(0)].bornSum*0x100000000));
}
#else
int writeX = (pos < end ? x : -1);
......@@ -593,17 +593,17 @@ __kernel void computeGBSAForce1(
#ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0xFFFFFFFF));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0xFFFFFFFF));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0xFFFFFFFF));
atom_add(&global_bornForce[offset], (long) (force.w*0xFFFFFFFF));
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
atom_add(&global_bornForce[offset], (long) (force.w*0x100000000));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0xFFFFFFFF));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0xFFFFFFFF));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0xFFFFFFFF));
atom_add(&global_bornForce[offset], (long) (localData[get_local_id(0)].fw*0xFFFFFFFF));
atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000));
atom_add(&global_bornForce[offset], (long) (localData[get_local_id(0)].fw*0x100000000));
}
#else
int writeX = (pos < end ? x : -1);
......
......@@ -161,9 +161,9 @@ void computeNonbonded(
if (get_local_id(0) < TILE_SIZE) {
#ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].fx)*0xFFFFFFFF));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0xFFFFFFFF));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0xFFFFFFFF));
atom_add(&forceBuffers[offset], (long) ((force.x + localData[tgx].fx)*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0x100000000));
#else
force.x += localData[tgx].fx;
force.y += localData[tgx].fy;
......@@ -263,12 +263,12 @@ void computeNonbonded(
#ifdef SUPPORTS_64_BIT_ATOMICS
const unsigned int offset1 = x*TILE_SIZE + tgx;
const unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].fx)*0xFFFFFFFF));
atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0xFFFFFFFF));
atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0xFFFFFFFF));
atom_add(&forceBuffers[offset2], (long) ((localForce[tgx].x + localForce[tgx+TILE_SIZE].x)*0xFFFFFFFF));
atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[tgx].y + localForce[tgx+TILE_SIZE].y)*0xFFFFFFFF));
atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[tgx].z + localForce[tgx+TILE_SIZE].z)*0xFFFFFFFF));
atom_add(&forceBuffers[offset1], (long) ((force.x + localData[tgx].fx)*0x100000000));
atom_add(&forceBuffers[offset1+PADDED_NUM_ATOMS], (long) ((force.y + localData[tgx].fy)*0x100000000));
atom_add(&forceBuffers[offset1+2*PADDED_NUM_ATOMS], (long) ((force.z + localData[tgx].fz)*0x100000000));
atom_add(&forceBuffers[offset2], (long) ((localForce[tgx].x + localForce[tgx+TILE_SIZE].x)*0x100000000));
atom_add(&forceBuffers[offset2+PADDED_NUM_ATOMS], (long) ((localForce[tgx].y + localForce[tgx+TILE_SIZE].y)*0x100000000));
atom_add(&forceBuffers[offset2+2*PADDED_NUM_ATOMS], (long) ((localForce[tgx].z + localForce[tgx+TILE_SIZE].z)*0x100000000));
#else
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
const unsigned int offset1 = x*TILE_SIZE + tgx + y*PADDED_NUM_ATOMS;
......
......@@ -295,15 +295,15 @@ __kernel void computeNonbonded(
#ifdef SUPPORTS_64_BIT_ATOMICS
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0xFFFFFFFF));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0xFFFFFFFF));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0xFFFFFFFF));
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0xFFFFFFFF));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0xFFFFFFFF));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0xFFFFFFFF));
atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0x100000000));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000));
}
#else
int writeX = (pos < end ? x : -1);
......
......@@ -126,9 +126,9 @@ void gridSpreadCharge(__global const real4* restrict posq, __global const int2*
y -= (y >= GRID_SIZE_Y ? GRID_SIZE_Y : 0);
z -= (z >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
#ifdef USE_DOUBLE_PRECISION
atom_add(&pmeGrid[2*(x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z)], (long) (add*0xFFFFFFFF));
atom_add(&pmeGrid[2*(x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z)], (long) (add*0x100000000));
#else
atom_add(&pmeGrid[x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z], (long) (add*0xFFFFFFFF));
atom_add(&pmeGrid[x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z], (long) (add*0x100000000));
#endif
}
}
......@@ -138,7 +138,7 @@ void gridSpreadCharge(__global const real4* restrict posq, __global const int2*
__kernel void finishSpreadCharge(__global long* restrict pmeGrid) {
__global real2* realGrid = (__global real2*) pmeGrid;
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
real scale = EPSILON_FACTOR/(real) 0xFFFFFFFF;
real scale = EPSILON_FACTOR/(real) 0x100000000;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
#ifdef USE_DOUBLE_PRECISION
long value = pmeGrid[2*index];
......
......@@ -86,7 +86,7 @@ __kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize,
*/
__kernel void reduceForces(__global const long* restrict longBuffer, __global real4* restrict buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
real scale = 1/(real) 0xFFFFFFFF;
real scale = 1/(real) 0x100000000;
for (int index = get_global_id(0); index < bufferSize; index += get_global_size(0)) {
real4 sum = (real4) (scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0);
for (int i = index; i < totalSize; i += bufferSize)
......
......@@ -7,7 +7,7 @@ extern "C" __global__ void reduceBornSum(const long long* __restrict__ bornSum,
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Get summed Born data
real sum = RECIP(0xFFFFFFFF)*bornSum[index];
real sum = RECIP(0x100000000)*bornSum[index];
// Now calculate Born radius.
......@@ -33,7 +33,7 @@ extern "C" __global__ void computeSurfaceAreaForce(long long* __restrict__ bornF
ratio6 = ratio6*ratio6*ratio6;
ratio6 = ratio6*ratio6;
real saTerm = SURFACE_AREA_FACTOR * r * r * ratio6;
bornForce[index] += (long long) (saTerm*0xFFFFFFFF/bornRadius);
bornForce[index] += (long long) (saTerm*0x100000000/bornRadius);
energy += saTerm;
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] -= energy/6;
......@@ -157,11 +157,11 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ bornS
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (data.bornSum*0xFFFFFFFF)));
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (data.bornSum*0x100000000)));
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornSum*0xFFFFFFFF)));
atomicAdd(&bornSum[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornSum*0x100000000)));
}
lasty = y;
pos++;
......@@ -276,9 +276,9 @@ extern "C" __global__ void computeGKForces(
}
}
data.force *= 0.5f;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques.
......@@ -292,9 +292,9 @@ extern "C" __global__ void computeGKForces(
data.force += tempTorque;
}
}
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute chain rule terms.
......@@ -304,7 +304,7 @@ extern "C" __global__ void computeGKForces(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
computeOneInteractionB1B2(data, localData[tbx+j]);
}
atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF)));
atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0x100000000)));
}
else {
// This is an off-diagonal tile.
......@@ -330,13 +330,13 @@ extern "C" __global__ void computeGKForces(
localData[threadIdx.x].force *= 0.5f;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
// Compute torques.
......@@ -358,13 +358,13 @@ extern "C" __global__ void computeGKForces(
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
// Compute chain rule terms.
......@@ -379,9 +379,9 @@ extern "C" __global__ void computeGKForces(
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF)));
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (data.bornForce*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornForce*0xFFFFFFFF)));
atomicAdd(&bornForce[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].bornForce*0x100000000)));
}
}
}
......@@ -405,7 +405,7 @@ inline __device__ void loadAtomData3(AtomData3& data, int atom, const real4* __r
float2 params1 = params[atom];
data.radius = params1.x;
data.scaledRadius = params1.y;
data.bornForce = bornForce[atom]/(real) 0xFFFFFFFF;
data.bornForce = bornForce[atom]/(real) 0x100000000;
}
__device__ void computeBornChainRuleInteraction(AtomData3& atom1, AtomData3& atom2, real3& force) {
......@@ -506,9 +506,9 @@ extern "C" __global__ void computeChainRuleForce(
localData[tbx+j].force += tempForce;
}
}
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) ((data.force.x+localData[threadIdx.x].force.x)*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.y+localData[threadIdx.x].force.y)*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.z+localData[threadIdx.x].force.z)*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) ((data.force.x+localData[threadIdx.x].force.x)*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.y+localData[threadIdx.x].force.y)*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) ((data.force.z+localData[threadIdx.x].force.z)*0x100000000)));
}
else {
// This is an off-diagonal tile.
......@@ -532,13 +532,13 @@ extern "C" __global__ void computeChainRuleForce(
}
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
}
}
......@@ -687,9 +687,9 @@ extern "C" __global__ void computeEDiffForce(
}
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques.
......@@ -706,9 +706,9 @@ extern "C" __global__ void computeEDiffForce(
}
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
}
else {
// This is an off-diagonal tile.
......@@ -740,13 +740,13 @@ extern "C" __global__ void computeEDiffForce(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
// Compute torques.
......@@ -770,13 +770,13 @@ extern "C" __global__ void computeEDiffForce(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
}
}
......
......@@ -246,14 +246,14 @@ extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forc
tj = (tj+1) & (TILE_SIZE-1);
}
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
if (x != y) {
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
}
pos++;
......
......@@ -158,9 +158,9 @@ extern "C" __global__ void computeElectrostatics(
}
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
// Compute torques.
......@@ -177,9 +177,9 @@ extern "C" __global__ void computeElectrostatics(
}
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
}
else {
// This is an off-diagonal tile.
......@@ -243,13 +243,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
// Compute torques.
......@@ -301,13 +301,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
}
}
......@@ -341,13 +341,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
// Compute torques.
......@@ -372,13 +372,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
}
}
}
......
......@@ -637,30 +637,30 @@ extern "C" __global__ void computeFixedField(
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (data.field.x*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (data.gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.z*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (data.gkField.x*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.y*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.gkField.z*0x100000000)));
#endif
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.x*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.y*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.z*0xFFFFFFFF)));
atomicAdd(&gkFieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.x*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.y*0x100000000)));
atomicAdd(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].gkField.z*0x100000000)));
#endif
}
pos++;
......
......@@ -387,36 +387,36 @@ extern "C" __global__ void computeInducedField(
if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0xFFFFFFFF)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0xFFFFFFFF)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF)));
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0xFFFFFFFF)));
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0x100000000)));
#endif
}
if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0xFFFFFFFF)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0xFFFFFFFF)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF)));
atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0x100000000)));
atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0x100000000)));
atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0x100000000)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0xFFFFFFFF)));
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0x100000000)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0x100000000)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0x100000000)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0x100000000)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0x100000000)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0x100000000)));
#endif
}
pos++;
......@@ -433,7 +433,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
#else
const real ewaldScale = 0;
#endif
const real fieldScale = 1/(real) 0xFFFFFFFF;
const real fieldScale = 1/(real) 0x100000000;
real sumErrors = 0;
real sumPolarErrors = 0;
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
......
......@@ -478,13 +478,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi[20*m+18] = tuv012;
phi[20*m+19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0xFFFFFFFF);
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0x100000000);
fieldBuffers[m] = fieldx;
fieldPolarBuffers[m] = fieldx;
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*invPeriodicBoxSize.y*tuv010)*0xFFFFFFFF);
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*invPeriodicBoxSize.y*tuv010)*0x100000000);
fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
fieldPolarBuffers[m+PADDED_NUM_ATOMS] = fieldy;
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*invPeriodicBoxSize.z*tuv001)*0xFFFFFFFF);
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*invPeriodicBoxSize.z*tuv001)*0x100000000);
fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
fieldPolarBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
}
......@@ -723,17 +723,17 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
torqueBuffers[i] = (long long) (EPSILON_FACTOR*(multipole[3]*yscale*phi[2] - multipole[2]*zscale*phi[3]
+ 2*(multipole[6]-multipole[5])*yscale*zscale*phi[9]
+ multipole[8]*xscale*yscale*phi[7] + multipole[9]*yscale*yscale*phi[5]
- multipole[7]*xscale*zscale*phi[8] - multipole[9]*zscale*zscale*phi[6])*0xFFFFFFFF);
- multipole[7]*xscale*zscale*phi[8] - multipole[9]*zscale*zscale*phi[6])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS] = (long long) (EPSILON_FACTOR*(multipole[1]*zscale*phi[3] - multipole[3]*xscale*phi[1]
+ 2*(multipole[4]-multipole[6])*xscale*zscale*phi[8]
+ multipole[7]*yscale*zscale*phi[9] + multipole[8]*zscale*zscale*phi[6]
- multipole[8]*xscale*xscale*phi[4] - multipole[9]*xscale*yscale*phi[7])*0xFFFFFFFF);
- multipole[8]*xscale*xscale*phi[4] - multipole[9]*xscale*yscale*phi[7])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS*2] = (long long) (EPSILON_FACTOR*(multipole[2]*xscale*phi[1] - multipole[1]*yscale*phi[2]
+ 2*(multipole[5]-multipole[4])*xscale*yscale*phi[7]
+ multipole[7]*xscale*xscale*phi[4] + multipole[9]*xscale*zscale*phi[8]
- multipole[7]*yscale*yscale*phi[5] - multipole[8]*yscale*zscale*phi[9])*0xFFFFFFFF);
- multipole[7]*yscale*yscale*phi[5] - multipole[8]*yscale*zscale*phi[9])*0x100000000);
// Compute the force and energy.
......@@ -757,9 +757,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
f.x *= EPSILON_FACTOR*xscale;
f.y *= EPSILON_FACTOR*yscale;
f.z *= EPSILON_FACTOR*zscale;
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0xFFFFFFFF));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0xFFFFFFFF));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0xFFFFFFFF));
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0x100000000));
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy;
}
......@@ -801,17 +801,17 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
torqueBuffers[i] += (long long) (0.5f*EPSILON_FACTOR*(multipole[3]*yscale*phidp[2] - multipole[2]*zscale*phidp[3]
+ 2*(multipole[6]-multipole[5])*yscale*zscale*phidp[9]
+ multipole[8]*xscale*yscale*phidp[7] + multipole[9]*yscale*yscale*phidp[5]
- multipole[7]*xscale*zscale*phidp[8] - multipole[9]*zscale*zscale*phidp[6])*0xFFFFFFFF);
- multipole[7]*xscale*zscale*phidp[8] - multipole[9]*zscale*zscale*phidp[6])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS] += (long long) (0.5f*EPSILON_FACTOR*(multipole[1]*zscale*phidp[3] - multipole[3]*xscale*phidp[1]
+ 2*(multipole[4]-multipole[6])*xscale*zscale*phidp[8]
+ multipole[7]*yscale*zscale*phidp[9] + multipole[8]*zscale*zscale*phidp[6]
- multipole[8]*xscale*xscale*phidp[4] - multipole[9]*xscale*yscale*phidp[7])*0xFFFFFFFF);
- multipole[8]*xscale*xscale*phidp[4] - multipole[9]*xscale*yscale*phidp[7])*0x100000000);
torqueBuffers[i+PADDED_NUM_ATOMS*2] += (long long) (0.5f*EPSILON_FACTOR*(multipole[2]*xscale*phidp[1] - multipole[1]*yscale*phidp[2]
+ 2*(multipole[5]-multipole[4])*xscale*yscale*phidp[7]
+ multipole[7]*xscale*xscale*phidp[4] + multipole[9]*xscale*zscale*phidp[8]
- multipole[7]*yscale*yscale*phidp[5] - multipole[8]*yscale*zscale*phidp[9])*0xFFFFFFFF);
- multipole[7]*yscale*yscale*phidp[5] - multipole[8]*yscale*zscale*phidp[9])*0x100000000);
// Compute the force and energy.
......@@ -865,18 +865,18 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
f.x *= 0.5f*EPSILON_FACTOR*xscale;
f.y *= 0.5f*EPSILON_FACTOR*yscale;
f.z *= 0.5f*EPSILON_FACTOR*zscale;
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0xFFFFFFFF));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0xFFFFFFFF));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0xFFFFFFFF));
forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0x100000000));
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy;
}
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip,
long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real4 invPeriodicBoxSize) {
real xscale = GRID_SIZE_X*invPeriodicBoxSize.x*0xFFFFFFFF;
real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y*0xFFFFFFFF;
real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z*0xFFFFFFFF;
real xscale = GRID_SIZE_X*invPeriodicBoxSize.x*0x100000000;
real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y*0x100000000;
real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z*0x100000000;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (xscale*phid[10*i+1]);
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (yscale*phid[10*i+2]);
......
......@@ -212,7 +212,7 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi
#endif
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF;
real scale = polarizability[atom]/(real) 0x100000000;
inducedDipole[3*atom] = scale*fieldBuffers[atom];
inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
......@@ -263,7 +263,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
const int Z = 2;
const int I = 3;
const real torqueScale = RECIP((double) 0xFFFFFFFF);
const real torqueScale = RECIP((double) 0x100000000);
real3 forces[4];
real norms[LastVectorIndex];
......@@ -423,22 +423,22 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
// Store results
atomicAdd(&forceBuffers[particles.z], static_cast<unsigned long long>((long long) (forces[Z].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.z], static_cast<unsigned long long>((long long) (forces[Z].x*0x100000000)));
atomicAdd(&forceBuffers[particles.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].y*0x100000000)));
atomicAdd(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Z].z*0x100000000)));
if (axisType != 4) {
atomicAdd(&forceBuffers[particles.x], static_cast<unsigned long long>((long long) (forces[X].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.x], static_cast<unsigned long long>((long long) (forces[X].x*0x100000000)));
atomicAdd(&forceBuffers[particles.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].y*0x100000000)));
atomicAdd(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[X].z*0x100000000)));
}
if ((axisType == 2 || axisType == 3) && particles.y > -1) {
atomicAdd(&forceBuffers[particles.y], static_cast<unsigned long long>((long long) (forces[Y].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[particles.y], static_cast<unsigned long long>((long long) (forces[Y].x*0x100000000)));
atomicAdd(&forceBuffers[particles.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].y*0x100000000)));
atomicAdd(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[Y].z*0x100000000)));
}
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (forces[I].x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (forces[I].x*0x100000000)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].y*0x100000000)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forces[I].z*0x100000000)));
}
}
}
......
......@@ -282,12 +282,12 @@ extern "C" __global__ void computeElectrostatics(
computeSelfEnergyAndTorque(data, energy);
data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.torque.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
}
else {
// This is an off-diagonal tile.
......@@ -368,19 +368,19 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].torque *= -ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
}
}
}
......@@ -411,19 +411,19 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].torque *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.torque.z*0x100000000)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0x100000000)));
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.x*0x100000000)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.y*0x100000000)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].torque.z*0x100000000)));
}
}
}
......
......@@ -88,7 +88,7 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
const int indexInBlock = threadIdx.x-blockStart;
const mixed nkT = NUM_COPIES*kT;
const mixed twown = 2.0f*nkT/HBAR;
const mixed forceScale = 1/(mixed) 0xFFFFFFFF;
const mixed forceScale = 1/(mixed) 0x100000000;
__shared__ mixed3 q[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 v[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
......@@ -165,7 +165,7 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix
const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart;
const mixed forceScale = 1/(mixed) 0xFFFFFFFF;
const mixed forceScale = 1/(mixed) 0x100000000;
// Update velocities.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment