Unverified Commit 434d7afb authored by Anton Gorenko's avatar Anton Gorenko Committed by GitHub
Browse files

Add realToFixedPoint to all platforms (#3504)

It allows to use a faster float-to-int64 in the HIP platform.
parent ca80579a
...@@ -256,9 +256,9 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in ...@@ -256,9 +256,9 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
s<<" {\n"; s<<" {\n";
if (context.getSupports64BitGlobalAtomics()) { if (context.getSupports64BitGlobalAtomics()) {
s<<" atom_add(&forceBuffers[atom"<<(i+1)<<"], (long) (force"<<(i+1)<<".x*0x100000000));\n"; s<<" atom_add(&forceBuffers[atom"<<(i+1)<<"], realToFixedPoint(force"<<(i+1)<<".x));\n";
s<<" atom_add(&forceBuffers[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], (long) (force"<<(i+1)<<".y*0x100000000));\n"; s<<" atom_add(&forceBuffers[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], realToFixedPoint(force"<<(i+1)<<".y));\n";
s<<" atom_add(&forceBuffers[atom"<<(i+1)<<"+2*PADDED_NUM_ATOMS], (long) (force"<<(i+1)<<".z*0x100000000));\n"; s<<" atom_add(&forceBuffers[atom"<<(i+1)<<"+2*PADDED_NUM_ATOMS], realToFixedPoint(force"<<(i+1)<<".z));\n";
} }
else { else {
s<<" unsigned int offset = atom"<<(i+1)<<"+buffers"<<suffix[i]<<"*PADDED_NUM_ATOMS;\n"; s<<" unsigned int offset = atom"<<(i+1)<<"+buffers"<<suffix[i]<<"*PADDED_NUM_ATOMS;\n";
......
...@@ -59,3 +59,7 @@ typedef unsigned long mm_ulong; ...@@ -59,3 +59,7 @@ typedef unsigned long mm_ulong;
#define asinf(x) asin(x) #define asinf(x) asin(x)
#define atanf(x) atan(x) #define atanf(x) atan(x)
#define atan2f(x, y) atan2(x, y) #define atan2f(x, y) atan2(x, y)
inline long realToFixedPoint(real x) {
return (long) (x * 0x100000000);
}
...@@ -178,14 +178,14 @@ __kernel void computeNonbonded( ...@@ -178,14 +178,14 @@ __kernel void computeNonbonded(
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000)); atom_add(&forceBuffers[offset], realToFixedPoint(force.x));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (force.y*0x100000000)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (localData[get_local_id(0)].fx*0x100000000)); atom_add(&forceBuffers[offset], realToFixedPoint(localData[get_local_id(0)].fx));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], realToFixedPoint(localData[get_local_id(0)].fy));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[get_local_id(0)].fz));
} }
#else #else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS; unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
...@@ -410,13 +410,13 @@ __kernel void computeNonbonded( ...@@ -410,13 +410,13 @@ __kernel void computeNonbonded(
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1], (long) (force.x*0x100000000)); atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], (long) (force.y*0x100000000)); atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000)); atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
atom_add(&forceBuffers[atom2], (long) (localData[get_local_id(0)].fx*0x100000000)); atom_add(&forceBuffers[atom2], realToFixedPoint(localData[get_local_id(0)].fx));
atom_add(&forceBuffers[atom2+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000)); atom_add(&forceBuffers[atom2+PADDED_NUM_ATOMS], realToFixedPoint(localData[get_local_id(0)].fy));
atom_add(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000)); atom_add(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[get_local_id(0)].fz));
} }
#else #else
unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS;
......
...@@ -107,9 +107,9 @@ __kernel void computeNonbonded( ...@@ -107,9 +107,9 @@ __kernel void computeNonbonded(
// Write results. // Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1], (long) (force.x*0x100000000)); atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], (long) (force.y*0x100000000)); atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000)); atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
#else #else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz; forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
...@@ -183,9 +183,9 @@ __kernel void computeNonbonded( ...@@ -183,9 +183,9 @@ __kernel void computeNonbonded(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1], (long) (force.x*0x100000000)); atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], (long) (force.y*0x100000000)); atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000)); atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
#else #else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz; forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
...@@ -197,9 +197,9 @@ __kernel void computeNonbonded( ...@@ -197,9 +197,9 @@ __kernel void computeNonbonded(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) { for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE + tgx; unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (localData[tgx].fx*0x100000000)); atom_add(&forceBuffers[offset], realToFixedPoint(localData[tgx].fx));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], (long) (localData[tgx].fy*0x100000000)); atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (long) (localData[tgx].fz*0x100000000)); atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
#else #else
unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset]; real4 f = forceBuffers[offset];
...@@ -342,9 +342,9 @@ __kernel void computeNonbonded( ...@@ -342,9 +342,9 @@ __kernel void computeNonbonded(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1], (long) (force.x*0x100000000)); atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], (long) (force.y*0x100000000)); atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000)); atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
#else #else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz; forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
...@@ -409,9 +409,9 @@ __kernel void computeNonbonded( ...@@ -409,9 +409,9 @@ __kernel void computeNonbonded(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1], (long) (force.x*0x100000000)); atom_add(&forceBuffers[atom1], realToFixedPoint(force.x));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], (long) (force.y*0x100000000)); atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000)); atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
#else #else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz; forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
...@@ -429,9 +429,9 @@ __kernel void computeNonbonded( ...@@ -429,9 +429,9 @@ __kernel void computeNonbonded(
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom2], (long) (localData[tgx].fx*0x100000000)); atom_add(&forceBuffers[atom2], realToFixedPoint(localData[tgx].fx));
atom_add(&forceBuffers[atom2+PADDED_NUM_ATOMS], (long) (localData[tgx].fy*0x100000000)); atom_add(&forceBuffers[atom2+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
atom_add(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (long) (localData[tgx].fz*0x100000000)); atom_add(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
#else #else
unsigned int offset = atom2 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset]; real4 f = forceBuffers[offset];
......
...@@ -96,9 +96,9 @@ __kernel void reduceForces(__global long* restrict longBuffer, __global real4* r ...@@ -96,9 +96,9 @@ __kernel void reduceForces(__global long* restrict longBuffer, __global real4* r
for (int i = index; i < totalSize; i += bufferSize) for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i]; sum += buffer[i];
buffer[index] = sum; buffer[index] = sum;
longBuffer[index] = (long) (sum.x*0x100000000); longBuffer[index] = realToFixedPoint(sum.x);
longBuffer[index+bufferSize] = (long) (sum.y*0x100000000); longBuffer[index+bufferSize] = realToFixedPoint(sum.y);
longBuffer[index+2*bufferSize] = (long) (sum.z*0x100000000); longBuffer[index+2*bufferSize] = realToFixedPoint(sum.z);
} }
} }
...@@ -137,4 +137,4 @@ __kernel void determineNativeAccuracy(__global float8* restrict values, int numV ...@@ -137,4 +137,4 @@ __kernel void determineNativeAccuracy(__global float8* restrict values, int numV
__kernel void setCharges(__global real* restrict charges, __global real4* restrict posq, __global int* restrict atomOrder, int numAtoms) { __kernel void setCharges(__global real* restrict charges, __global real4* restrict posq, __global int* restrict atomOrder, int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0))
posq[i].w = charges[atomOrder[i]]; posq[i].w = charges[atomOrder[i]];
} }
\ No newline at end of file
...@@ -33,7 +33,7 @@ KERNEL void computeSurfaceAreaForce(GLOBAL mm_long* RESTRICT bornForce, GLOBAL m ...@@ -33,7 +33,7 @@ KERNEL void computeSurfaceAreaForce(GLOBAL mm_long* RESTRICT bornForce, GLOBAL m
ratio6 = ratio6*ratio6*ratio6; ratio6 = ratio6*ratio6*ratio6;
ratio6 = ratio6*ratio6; ratio6 = ratio6*ratio6;
real saTerm = SURFACE_AREA_FACTOR * r * r * ratio6; real saTerm = SURFACE_AREA_FACTOR * r * r * ratio6;
bornForce[index] += (mm_long) (saTerm*0x100000000/bornRadius); bornForce[index] += realToFixedPoint(saTerm/bornRadius);
energy += saTerm; energy += saTerm;
} }
energyBuffer[GLOBAL_ID] -= energy/6; energyBuffer[GLOBAL_ID] -= energy/6;
...@@ -169,11 +169,11 @@ KERNEL void computeBornSum(GLOBAL mm_ulong* RESTRICT bornSum, GLOBAL const real4 ...@@ -169,11 +169,11 @@ KERNEL void computeBornSum(GLOBAL mm_ulong* RESTRICT bornSum, GLOBAL const real4
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&bornSum[offset], (mm_ulong) ((mm_long) (data.bornSum*0x100000000))); ATOMIC_ADD(&bornSum[offset], (mm_ulong) realToFixedPoint(data.bornSum));
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&bornSum[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].bornSum*0x100000000))); ATOMIC_ADD(&bornSum[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].bornSum));
} }
lasty = y; lasty = y;
pos++; pos++;
...@@ -283,10 +283,10 @@ KERNEL void computeGKForces( ...@@ -283,10 +283,10 @@ KERNEL void computeGKForces(
} }
SYNC_WARPS; SYNC_WARPS;
data.force *= 0.5f; data.force *= 0.5f;
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
// Compute torques. // Compute torques.
data.force = make_real3(0); data.force = make_real3(0);
...@@ -301,10 +301,10 @@ KERNEL void computeGKForces( ...@@ -301,10 +301,10 @@ KERNEL void computeGKForces(
} }
} }
SYNC_WARPS; SYNC_WARPS;
ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
// Compute chain rule terms. // Compute chain rule terms.
data.force = make_real3(0); data.force = make_real3(0);
...@@ -319,7 +319,7 @@ KERNEL void computeGKForces( ...@@ -319,7 +319,7 @@ KERNEL void computeGKForces(
SYNC_WARPS; SYNC_WARPS;
} }
} }
ATOMIC_ADD(&bornForce[atom1], (mm_ulong) ((mm_long) (data.bornForce*0x100000000))); ATOMIC_ADD(&bornForce[atom1], (mm_ulong) realToFixedPoint(data.bornForce));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -348,13 +348,13 @@ KERNEL void computeGKForces( ...@@ -348,13 +348,13 @@ KERNEL void computeGKForces(
localData[LOCAL_ID].force *= 0.5f; localData[LOCAL_ID].force *= 0.5f;
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
} }
// Compute torques. // Compute torques.
...@@ -380,13 +380,13 @@ KERNEL void computeGKForces( ...@@ -380,13 +380,13 @@ KERNEL void computeGKForces(
} }
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
} }
// Compute chain rule terms. // Compute chain rule terms.
...@@ -409,9 +409,9 @@ KERNEL void computeGKForces( ...@@ -409,9 +409,9 @@ KERNEL void computeGKForces(
} }
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&bornForce[offset], (mm_ulong) ((mm_long) (data.bornForce*0x100000000))); ATOMIC_ADD(&bornForce[offset], (mm_ulong) realToFixedPoint(data.bornForce));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&bornForce[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].bornForce*0x100000000))); ATOMIC_ADD(&bornForce[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].bornForce));
} }
} }
} }
...@@ -543,9 +543,9 @@ KERNEL void computeChainRuleForce( ...@@ -543,9 +543,9 @@ KERNEL void computeChainRuleForce(
} }
SYNC_WARPS; SYNC_WARPS;
} }
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) ((data.force.x+localData[LOCAL_ID].force.x)*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint((data.force.x+localData[LOCAL_ID].force.x)));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) ((data.force.y+localData[LOCAL_ID].force.y)*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint((data.force.y+localData[LOCAL_ID].force.y)));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) ((data.force.z+localData[LOCAL_ID].force.z)*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint((data.force.z+localData[LOCAL_ID].force.z)));
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -571,13 +571,13 @@ KERNEL void computeChainRuleForce( ...@@ -571,13 +571,13 @@ KERNEL void computeChainRuleForce(
} }
if (pos < end) { if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
} }
} }
} }
...@@ -700,9 +700,9 @@ KERNEL void computeEDiffForce( ...@@ -700,9 +700,9 @@ KERNEL void computeEDiffForce(
} }
SYNC_WARPS; SYNC_WARPS;
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
// Compute torques. // Compute torques.
...@@ -718,9 +718,9 @@ KERNEL void computeEDiffForce( ...@@ -718,9 +718,9 @@ KERNEL void computeEDiffForce(
} }
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
SYNC_WARPS; SYNC_WARPS;
} }
else { else {
...@@ -753,13 +753,13 @@ KERNEL void computeEDiffForce( ...@@ -753,13 +753,13 @@ KERNEL void computeEDiffForce(
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR; localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR;
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
// Compute torques. // Compute torques.
...@@ -783,13 +783,13 @@ KERNEL void computeEDiffForce( ...@@ -783,13 +783,13 @@ KERNEL void computeEDiffForce(
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR; localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR;
offset = x*TILE_SIZE + tgx; offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
SYNC_WARPS; SYNC_WARPS;
} }
} }
...@@ -865,13 +865,13 @@ KERNEL void computeEDiffForce( ...@@ -865,13 +865,13 @@ KERNEL void computeEDiffForce(
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR; localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR;
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
// Compute torques. // Compute torques.
...@@ -893,13 +893,13 @@ KERNEL void computeEDiffForce( ...@@ -893,13 +893,13 @@ KERNEL void computeEDiffForce(
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR; localData[LOCAL_ID].force *= ENERGY_SCALE_FACTOR;
offset = x*TILE_SIZE + tgx; offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
} }
pos++; pos++;
} }
......
...@@ -250,14 +250,14 @@ KERNEL void computeWCAForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL mixed ...@@ -250,14 +250,14 @@ KERNEL void computeWCAForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL mixed
SYNC_WARPS; SYNC_WARPS;
} }
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
} }
} }
pos++; pos++;
......
...@@ -174,14 +174,14 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne ...@@ -174,14 +174,14 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne
// Write results. // Write results.
unsigned int offset1 = x*TILE_SIZE + tgx; unsigned int offset1 = x*TILE_SIZE + tgx;
ATOMIC_ADD(&fieldBuffers[offset1], (mm_ulong) ((mm_long) (field.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset1], (mm_ulong) realToFixedPoint(field.x));
ATOMIC_ADD(&fieldBuffers[offset1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(field.y));
ATOMIC_ADD(&fieldBuffers[offset1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(field.z));
if (x != y) { if (x != y) {
unsigned int offset2 = y*TILE_SIZE + tgx; unsigned int offset2 = y*TILE_SIZE + tgx;
ATOMIC_ADD(&fieldBuffers[offset2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&fieldBuffers[offset2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&fieldBuffers[offset2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
} }
} }
...@@ -344,18 +344,18 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne ...@@ -344,18 +344,18 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne
// Write results. // Write results.
ATOMIC_ADD(&fieldBuffers[atom1], (mm_ulong) ((mm_long) (field.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom1], (mm_ulong) realToFixedPoint(field.x));
ATOMIC_ADD(&fieldBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(field.y));
ATOMIC_ADD(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (field.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(field.z));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[LOCAL_ID]; unsigned int atom2 = atomIndices[LOCAL_ID];
#else #else
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
ATOMIC_ADD(&fieldBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&fieldBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
} }
} }
tile++; tile++;
...@@ -395,12 +395,12 @@ KERNEL void computeFieldExceptions(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ ...@@ -395,12 +395,12 @@ KERNEL void computeFieldExceptions(GLOBAL const real4* RESTRICT posq, GLOBAL mm_
real3 tempField1 = make_real3(0); real3 tempField1 = make_real3(0);
real3 tempField2 = make_real3(0); real3 tempField2 = make_real3(0);
COMPUTE_FIELD COMPUTE_FIELD
ATOMIC_ADD(&fieldBuffers[atom1], (mm_ulong) ((mm_long) (tempField1.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom1], (mm_ulong) realToFixedPoint(tempField1.x));
ATOMIC_ADD(&fieldBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField1.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempField1.y));
ATOMIC_ADD(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField1.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempField1.z));
ATOMIC_ADD(&fieldBuffers[atom2], (mm_ulong) ((mm_long) (tempField2.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom2], (mm_ulong) realToFixedPoint(tempField2.x));
ATOMIC_ADD(&fieldBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField2.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempField2.y));
ATOMIC_ADD(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempField2.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempField2.z));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
......
...@@ -362,22 +362,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons ...@@ -362,22 +362,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons
// Store results // Store results
ATOMIC_ADD(&forceBuffers[particles.z], (mm_ulong) ((mm_long) (forces[Z].x*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.z], (mm_ulong) realToFixedPoint(forces[Z].x));
ATOMIC_ADD(&forceBuffers[particles.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Z].y*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.z+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Z].y));
ATOMIC_ADD(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Z].z*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Z].z));
if (axisType != 4) { if (axisType != 4) {
ATOMIC_ADD(&forceBuffers[particles.x], (mm_ulong) ((mm_long) (forces[X].x*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.x], (mm_ulong) realToFixedPoint(forces[X].x));
ATOMIC_ADD(&forceBuffers[particles.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[X].y*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[X].y));
ATOMIC_ADD(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[X].z*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[X].z));
} }
if ((axisType == 2 || axisType == 3) && particles.y > -1) { if ((axisType == 2 || axisType == 3) && particles.y > -1) {
ATOMIC_ADD(&forceBuffers[particles.y], (mm_ulong) ((mm_long) (forces[Y].x*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.y], (mm_ulong) realToFixedPoint(forces[Y].x));
ATOMIC_ADD(&forceBuffers[particles.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Y].y*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Y].y));
ATOMIC_ADD(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Y].z*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Y].z));
} }
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) ((mm_long) (forces[I].x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) realToFixedPoint(forces[I].x));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[I].y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[I].y));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[I].z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[I].z));
} }
} }
} }
...@@ -201,30 +201,30 @@ KERNEL void computeNonbonded( ...@@ -201,30 +201,30 @@ KERNEL void computeNonbonded(
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
// write results for off diagonal tiles // write results for off diagonal tiles
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (shflForce.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(shflForce.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflForce.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflForce.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflForce.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflForce.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (shflTorque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(shflTorque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflTorque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflTorque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflTorque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflTorque.z));
#else #else
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].tx*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].tx));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].ty*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].ty));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].tz*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].tz));
#endif #endif
} }
// Write results for on and off diagonal tiles // Write results for on and off diagonal tiles
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque.z));
} }
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...@@ -444,12 +444,12 @@ KERNEL void computeNonbonded( ...@@ -444,12 +444,12 @@ KERNEL void computeNonbonded(
// Write results. // Write results.
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) ((mm_long) (torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) realToFixedPoint(torque.x));
ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque.y));
ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque.z));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[LOCAL_ID]; unsigned int atom2 = atomIndices[LOCAL_ID];
#else #else
...@@ -457,19 +457,19 @@ KERNEL void computeNonbonded( ...@@ -457,19 +457,19 @@ KERNEL void computeNonbonded(
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (shflForce.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(shflForce.x));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflForce.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflForce.y));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflForce.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflForce.z));
ATOMIC_ADD(&torqueBuffers[atom2], (mm_ulong) ((mm_long) (shflTorque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2], (mm_ulong) realToFixedPoint(shflTorque.x));
ATOMIC_ADD(&torqueBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflTorque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflTorque.y));
ATOMIC_ADD(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (shflTorque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(shflTorque.z));
#else #else
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
ATOMIC_ADD(&torqueBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].tx*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].tx));
ATOMIC_ADD(&torqueBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].ty*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].ty));
ATOMIC_ADD(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].tz*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].tz));
#endif #endif
} }
} }
......
...@@ -79,18 +79,18 @@ KERNEL void computeNonbondedExceptions( ...@@ -79,18 +79,18 @@ KERNEL void computeNonbondedExceptions(
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (tempForce.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(tempForce.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempForce.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempForce.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempForce.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempForce.z));
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (-tempForce.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(-tempForce.x));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (-tempForce.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(-tempForce.y));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (-tempForce.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(-tempForce.z));
ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) ((mm_long) (tempTorque1.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) realToFixedPoint(tempTorque1.x));
ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque1.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempTorque1.y));
ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque1.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempTorque1.z));
ATOMIC_ADD(&torqueBuffers[atom2], (mm_ulong) ((mm_long) (tempTorque2.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2], (mm_ulong) realToFixedPoint(tempTorque2.x));
ATOMIC_ADD(&torqueBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque2.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempTorque2.y));
ATOMIC_ADD(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque2.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(tempTorque2.z));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
......
...@@ -436,12 +436,12 @@ KERNEL void computeElectrostatics( ...@@ -436,12 +436,12 @@ KERNEL void computeElectrostatics(
} }
data.force *= -ENERGY_SCALE_FACTOR; data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR; data.torque *= ENERGY_SCALE_FACTOR;
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) ((mm_long) (data.torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) realToFixedPoint(data.torque.x));
ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.y));
ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.z));
SYNC_WARPS; SYNC_WARPS;
} }
else { else {
...@@ -468,19 +468,19 @@ KERNEL void computeElectrostatics( ...@@ -468,19 +468,19 @@ KERNEL void computeElectrostatics(
localData[LOCAL_ID].force *= -ENERGY_SCALE_FACTOR; localData[LOCAL_ID].force *= -ENERGY_SCALE_FACTOR;
localData[LOCAL_ID].torque *= ENERGY_SCALE_FACTOR; localData[LOCAL_ID].torque *= ENERGY_SCALE_FACTOR;
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (data.torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(data.torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.z));
SYNC_WARPS; SYNC_WARPS;
} }
} }
...@@ -578,25 +578,25 @@ KERNEL void computeElectrostatics( ...@@ -578,25 +578,25 @@ KERNEL void computeElectrostatics(
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (data.torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(data.torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.z));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
offset = atomIndices[LOCAL_ID]; offset = atomIndices[LOCAL_ID];
#else #else
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
#endif #endif
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.z));
} }
pos++; pos++;
} }
energyBuffer[GLOBAL_ID] += energy*ENERGY_SCALE_FACTOR; energyBuffer[GLOBAL_ID] += energy*ENERGY_SCALE_FACTOR;
} }
\ No newline at end of file
...@@ -564,29 +564,29 @@ KERNEL void computeFixedField( ...@@ -564,29 +564,29 @@ KERNEL void computeFixedField(
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) ((mm_long) (data.field.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) realToFixedPoint(data.field.x));
ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.field.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.field.y));
ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.field.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.field.z));
ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) ((mm_long) (data.fieldPolar.x*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) realToFixedPoint(data.fieldPolar.x));
ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolar.y*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolar.y));
ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolar.z*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolar.z));
#ifdef USE_GK #ifdef USE_GK
ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) ((mm_long) (data.gkField.x*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) realToFixedPoint(data.gkField.x));
ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.gkField.y*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.gkField.y));
ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.gkField.z*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.gkField.z));
#endif #endif
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].field.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].field.x));
ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].field.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].field.y));
ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].field.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].field.z));
ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fieldPolar.x*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fieldPolar.x));
ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fieldPolar.y*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fieldPolar.y));
ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fieldPolar.z*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fieldPolar.z));
#ifdef USE_GK #ifdef USE_GK
ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].gkField.x*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].gkField.x));
ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].gkField.y*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].gkField.y));
ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].gkField.z*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].gkField.z));
#endif #endif
} }
} }
...@@ -706,32 +706,32 @@ KERNEL void computeFixedField( ...@@ -706,32 +706,32 @@ KERNEL void computeFixedField(
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) ((mm_long) (data.field.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) realToFixedPoint(data.field.x));
ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.field.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.field.y));
ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.field.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.field.z));
ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) ((mm_long) (data.fieldPolar.x*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) realToFixedPoint(data.fieldPolar.x));
ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolar.y*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolar.y));
ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolar.z*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolar.z));
#ifdef USE_GK #ifdef USE_GK
ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) ((mm_long) (data.gkField.x*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) realToFixedPoint(data.gkField.x));
ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.gkField.y*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.gkField.y));
ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.gkField.z*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.gkField.z));
#endif #endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
offset = atomIndices[LOCAL_ID]; offset = atomIndices[LOCAL_ID];
#else #else
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
#endif #endif
ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].field.x*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].field.x));
ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].field.y*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].field.y));
ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].field.z*0x100000000))); ATOMIC_ADD(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].field.z));
ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fieldPolar.x*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fieldPolar.x));
ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fieldPolar.y*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fieldPolar.y));
ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fieldPolar.z*0x100000000))); ATOMIC_ADD(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fieldPolar.z));
#ifdef USE_GK #ifdef USE_GK
ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].gkField.x*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].gkField.x));
ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].gkField.y*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].gkField.y));
ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].gkField.z*0x100000000))); ATOMIC_ADD(&gkFieldBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].gkField.z));
#endif #endif
} }
pos++; pos++;
......
...@@ -107,27 +107,27 @@ inline DEVICE void saveAtomData(int index, AtomData data, GLOBAL mm_ulong* RESTR ...@@ -107,27 +107,27 @@ inline DEVICE void saveAtomData(int index, AtomData data, GLOBAL mm_ulong* RESTR
#endif #endif
#endif #endif
) { ) {
ATOMIC_ADD(&field[index], (mm_ulong) ((mm_long) (data.field.x*0x100000000))); ATOMIC_ADD(&field[index], (mm_ulong) realToFixedPoint(data.field.x));
ATOMIC_ADD(&field[index+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.field.y*0x100000000))); ATOMIC_ADD(&field[index+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.field.y));
ATOMIC_ADD(&field[index+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.field.z*0x100000000))); ATOMIC_ADD(&field[index+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.field.z));
ATOMIC_ADD(&fieldPolar[index], (mm_ulong) ((mm_long) (data.fieldPolar.x*0x100000000))); ATOMIC_ADD(&fieldPolar[index], (mm_ulong) realToFixedPoint(data.fieldPolar.x));
ATOMIC_ADD(&fieldPolar[index+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolar.y*0x100000000))); ATOMIC_ADD(&fieldPolar[index+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolar.y));
ATOMIC_ADD(&fieldPolar[index+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolar.z*0x100000000))); ATOMIC_ADD(&fieldPolar[index+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolar.z));
#ifdef USE_GK #ifdef USE_GK
ATOMIC_ADD(&fieldS[index], (mm_ulong) ((mm_long) (data.fieldS.x*0x100000000))); ATOMIC_ADD(&fieldS[index], (mm_ulong) realToFixedPoint(data.fieldS.x));
ATOMIC_ADD(&fieldS[index+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldS.y*0x100000000))); ATOMIC_ADD(&fieldS[index+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldS.y));
ATOMIC_ADD(&fieldS[index+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldS.z*0x100000000))); ATOMIC_ADD(&fieldS[index+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldS.z));
ATOMIC_ADD(&fieldPolarS[index], (mm_ulong) ((mm_long) (data.fieldPolarS.x*0x100000000))); ATOMIC_ADD(&fieldPolarS[index], (mm_ulong) realToFixedPoint(data.fieldPolarS.x));
ATOMIC_ADD(&fieldPolarS[index+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolarS.y*0x100000000))); ATOMIC_ADD(&fieldPolarS[index+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolarS.y));
ATOMIC_ADD(&fieldPolarS[index+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.fieldPolarS.z*0x100000000))); ATOMIC_ADD(&fieldPolarS[index+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.fieldPolarS.z));
#endif #endif
#ifdef EXTRAPOLATED_POLARIZATION #ifdef EXTRAPOLATED_POLARIZATION
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
ATOMIC_ADD(&fieldGradient[6*index+i], (mm_ulong) ((mm_long) (data.fieldGradient[i]*0x100000000))); ATOMIC_ADD(&fieldGradient[6*index+i], (mm_ulong) realToFixedPoint(data.fieldGradient[i]));
ATOMIC_ADD(&fieldGradientPolar[6*index+i], (mm_ulong) ((mm_long) (data.fieldGradientPolar[i]*0x100000000))); ATOMIC_ADD(&fieldGradientPolar[6*index+i], (mm_ulong) realToFixedPoint(data.fieldGradientPolar[i]));
#ifdef USE_GK #ifdef USE_GK
ATOMIC_ADD(&fieldGradientS[6*index+i], (mm_ulong) ((mm_long) (data.fieldGradientS[i]*0x100000000))); ATOMIC_ADD(&fieldGradientS[6*index+i], (mm_ulong) realToFixedPoint(data.fieldGradientS[i]));
ATOMIC_ADD(&fieldGradientPolarS[6*index+i], (mm_ulong) ((mm_long) (data.fieldGradientPolarS[i]*0x100000000))); ATOMIC_ADD(&fieldGradientPolarS[6*index+i], (mm_ulong) realToFixedPoint(data.fieldGradientPolarS[i]));
#endif #endif
} }
#endif #endif
...@@ -995,9 +995,9 @@ KERNEL void addExtrapolatedFieldGradientToForce(GLOBAL mm_long* RESTRICT forceBu ...@@ -995,9 +995,9 @@ KERNEL void addExtrapolatedFieldGradientToForce(GLOBAL mm_long* RESTRICT forceBu
#endif #endif
} }
} }
forceBuffers[atom] += (mm_long) (fx*0x100000000); forceBuffers[atom] += realToFixedPoint(fx);
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_long) (fy*0x100000000); forceBuffers[atom+PADDED_NUM_ATOMS] += realToFixedPoint(fy);
forceBuffers[atom+PADDED_NUM_ATOMS*2] += (mm_long) (fz*0x100000000); forceBuffers[atom+PADDED_NUM_ATOMS*2] += realToFixedPoint(fz);
} }
} }
......
...@@ -275,13 +275,13 @@ KERNEL void gridSpreadFixedMultipoles(GLOBAL const real4* RESTRICT posq, GLOBAL ...@@ -275,13 +275,13 @@ KERNEL void gridSpreadFixedMultipoles(GLOBAL const real4* RESTRICT posq, GLOBAL
real add = term0*v.x + term1*v.y + term2*v.z; real add = term0*v.x + term1*v.y + term2*v.z;
#ifdef HIPPO #ifdef HIPPO
#ifdef USE_FIXED_POINT_CHARGE_SPREADING #ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD(&pmeGrid[index], (mm_ulong) ((mm_long) (add*0x100000000))); ATOMIC_ADD(&pmeGrid[index], (mm_ulong) realToFixedPoint(add));
#else #else
ATOMIC_ADD(&pmeGrid[index], add); ATOMIC_ADD(&pmeGrid[index], add);
#endif #endif
#else #else
#ifdef USE_FIXED_POINT_CHARGE_SPREADING #ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD(&pmeGrid[2*index], (mm_ulong) ((mm_long) (add*0x100000000))); ATOMIC_ADD(&pmeGrid[2*index], (mm_ulong) realToFixedPoint(add));
#else #else
ATOMIC_ADD(&pmeGrid[index].x, add); ATOMIC_ADD(&pmeGrid[index].x, add);
#endif #endif
...@@ -397,15 +397,15 @@ KERNEL void gridSpreadInducedDipoles(GLOBAL const real4* RESTRICT posq, GLOBAL c ...@@ -397,15 +397,15 @@ KERNEL void gridSpreadInducedDipoles(GLOBAL const real4* RESTRICT posq, GLOBAL c
real add1 = term01*v.x + term11*v.y; real add1 = term01*v.x + term11*v.y;
#ifdef HIPPO #ifdef HIPPO
#ifdef USE_FIXED_POINT_CHARGE_SPREADING #ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD(&pmeGrid[index], (mm_ulong) ((mm_long) (add1*0x100000000))); ATOMIC_ADD(&pmeGrid[index], (mm_ulong) realToFixedPoint(add1));
#else #else
ATOMIC_ADD(&pmeGrid[index], add1); ATOMIC_ADD(&pmeGrid[index], add1);
#endif #endif
#else #else
real add2 = term02*v.x + term12*v.y; real add2 = term02*v.x + term12*v.y;
#ifdef USE_FIXED_POINT_CHARGE_SPREADING #ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD(&pmeGrid[2*index], (mm_ulong) ((mm_long) (add1*0x100000000))); ATOMIC_ADD(&pmeGrid[2*index], (mm_ulong) realToFixedPoint(add1));
ATOMIC_ADD(&pmeGrid[2*index+1], (mm_ulong) ((mm_long) (add2*0x100000000))); ATOMIC_ADD(&pmeGrid[2*index+1], (mm_ulong) realToFixedPoint(add2));
#else #else
ATOMIC_ADD(&pmeGrid[index].x, add1); ATOMIC_ADD(&pmeGrid[index].x, add1);
ATOMIC_ADD(&pmeGrid[index].y, add2); ATOMIC_ADD(&pmeGrid[index].y, add2);
...@@ -648,9 +648,9 @@ KERNEL void computeFixedPotentialFromGrid( ...@@ -648,9 +648,9 @@ KERNEL void computeFixedPotentialFromGrid(
phi[m+NUM_ATOMS*18] = tuv012; phi[m+NUM_ATOMS*18] = tuv012;
phi[m+NUM_ATOMS*19] = tuv111; phi[m+NUM_ATOMS*19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI; real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
mm_long fieldx = (mm_long) ((dipoleScale*labDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2])*0x100000000); mm_long fieldx = realToFixedPoint(dipoleScale*labDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2]);
mm_long fieldy = (mm_long) ((dipoleScale*labDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2])*0x100000000); mm_long fieldy = realToFixedPoint(dipoleScale*labDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2]);
mm_long fieldz = (mm_long) ((dipoleScale*labDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2])*0x100000000); mm_long fieldz = realToFixedPoint(dipoleScale*labDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2]);
fieldBuffers[m] = fieldx; fieldBuffers[m] = fieldx;
fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy; fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz; fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
...@@ -999,20 +999,20 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO ...@@ -999,20 +999,20 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO
GLOBAL const real* cphi = &cphi_global[10*i]; GLOBAL const real* cphi = &cphi_global[10*i];
torqueBuffers[i] = (mm_long) (EPSILON_FACTOR*(multipole[3]*cphi[2] - multipole[2]*cphi[3] torqueBuffers[i] = realToFixedPoint(EPSILON_FACTOR*(multipole[3]*cphi[2] - multipole[2]*cphi[3]
+ 2*(multipole[6]-multipole[5])*cphi[9] + 2*(multipole[6]-multipole[5])*cphi[9]
+ multipole[8]*cphi[7] + multipole[9]*cphi[5] + multipole[8]*cphi[7] + multipole[9]*cphi[5]
- multipole[7]*cphi[8] - multipole[9]*cphi[6])*0x100000000); - multipole[7]*cphi[8] - multipole[9]*cphi[6]));
torqueBuffers[i+PADDED_NUM_ATOMS] = (mm_long) (EPSILON_FACTOR*(multipole[1]*cphi[3] - multipole[3]*cphi[1] torqueBuffers[i+PADDED_NUM_ATOMS] = realToFixedPoint(EPSILON_FACTOR*(multipole[1]*cphi[3] - multipole[3]*cphi[1]
+ 2*(multipole[4]-multipole[6])*cphi[8] + 2*(multipole[4]-multipole[6])*cphi[8]
+ multipole[7]*cphi[9] + multipole[8]*cphi[6] + multipole[7]*cphi[9] + multipole[8]*cphi[6]
- multipole[8]*cphi[4] - multipole[9]*cphi[7])*0x100000000); - multipole[8]*cphi[4] - multipole[9]*cphi[7]));
torqueBuffers[i+PADDED_NUM_ATOMS*2] = (mm_long) (EPSILON_FACTOR*(multipole[2]*cphi[1] - multipole[1]*cphi[2] torqueBuffers[i+PADDED_NUM_ATOMS*2] = realToFixedPoint(EPSILON_FACTOR*(multipole[2]*cphi[1] - multipole[1]*cphi[2]
+ 2*(multipole[5]-multipole[4])*cphi[7] + 2*(multipole[5]-multipole[4])*cphi[7]
+ multipole[7]*cphi[4] + multipole[9]*cphi[8] + multipole[7]*cphi[4] + multipole[9]*cphi[8]
- multipole[7]*cphi[5] - multipole[8]*cphi[9])*0x100000000); - multipole[7]*cphi[5] - multipole[8]*cphi[9]));
// Compute the force and energy. // Compute the force and energy.
...@@ -1036,9 +1036,9 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO ...@@ -1036,9 +1036,9 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO
f = make_real3(EPSILON_FACTOR*(f.x*fracToCart[0][0] + f.y*fracToCart[0][1] + f.z*fracToCart[0][2]), f = make_real3(EPSILON_FACTOR*(f.x*fracToCart[0][0] + f.y*fracToCart[0][1] + f.z*fracToCart[0][2]),
EPSILON_FACTOR*(f.x*fracToCart[1][0] + f.y*fracToCart[1][1] + f.z*fracToCart[1][2]), EPSILON_FACTOR*(f.x*fracToCart[1][0] + f.y*fracToCart[1][1] + f.z*fracToCart[1][2]),
EPSILON_FACTOR*(f.x*fracToCart[2][0] + f.y*fracToCart[2][1] + f.z*fracToCart[2][2])); EPSILON_FACTOR*(f.x*fracToCart[2][0] + f.y*fracToCart[2][1] + f.z*fracToCart[2][2]));
forceBuffers[i] -= (mm_ulong) ((mm_long) (f.x*0x100000000)); forceBuffers[i] -= (mm_ulong) realToFixedPoint(f.x);
forceBuffers[i+PADDED_NUM_ATOMS] -= (mm_ulong) ((mm_long) (f.y*0x100000000)); forceBuffers[i+PADDED_NUM_ATOMS] -= (mm_ulong) realToFixedPoint(f.y);
forceBuffers[i+PADDED_NUM_ATOMS*2] -= (mm_ulong) ((mm_long) (f.z*0x100000000)); forceBuffers[i+PADDED_NUM_ATOMS*2] -= (mm_ulong) realToFixedPoint(f.z);
} }
energyBuffer[GLOBAL_ID] += 0.5f*EPSILON_FACTOR*energy; energyBuffer[GLOBAL_ID] += 0.5f*EPSILON_FACTOR*energy;
} }
...@@ -1110,20 +1110,20 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB ...@@ -1110,20 +1110,20 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB
multipole[6] = -(multipole[4]+multipole[5]); multipole[6] = -(multipole[4]+multipole[5]);
GLOBAL const real* cphi = &cphi_global[10*i]; GLOBAL const real* cphi = &cphi_global[10*i];
torqueBuffers[i] += (mm_long) (scale*(multipole[3]*cphi[2] - multipole[2]*cphi[3] torqueBuffers[i] += realToFixedPoint(scale*(multipole[3]*cphi[2] - multipole[2]*cphi[3]
+ 2*(multipole[6]-multipole[5])*cphi[9] + 2*(multipole[6]-multipole[5])*cphi[9]
+ multipole[8]*cphi[7] + multipole[9]*cphi[5] + multipole[8]*cphi[7] + multipole[9]*cphi[5]
- multipole[7]*cphi[8] - multipole[9]*cphi[6])*0x100000000); - multipole[7]*cphi[8] - multipole[9]*cphi[6]));
torqueBuffers[i+PADDED_NUM_ATOMS] += (mm_long) (scale*(multipole[1]*cphi[3] - multipole[3]*cphi[1] torqueBuffers[i+PADDED_NUM_ATOMS] += realToFixedPoint(scale*(multipole[1]*cphi[3] - multipole[3]*cphi[1]
+ 2*(multipole[4]-multipole[6])*cphi[8] + 2*(multipole[4]-multipole[6])*cphi[8]
+ multipole[7]*cphi[9] + multipole[8]*cphi[6] + multipole[7]*cphi[9] + multipole[8]*cphi[6]
- multipole[8]*cphi[4] - multipole[9]*cphi[7])*0x100000000); - multipole[8]*cphi[4] - multipole[9]*cphi[7]));
torqueBuffers[i+PADDED_NUM_ATOMS*2] += (mm_long) (scale*(multipole[2]*cphi[1] - multipole[1]*cphi[2] torqueBuffers[i+PADDED_NUM_ATOMS*2] += realToFixedPoint(scale*(multipole[2]*cphi[1] - multipole[1]*cphi[2]
+ 2*(multipole[5]-multipole[4])*cphi[7] + 2*(multipole[5]-multipole[4])*cphi[7]
+ multipole[7]*cphi[4] + multipole[9]*cphi[8] + multipole[7]*cphi[4] + multipole[9]*cphi[8]
- multipole[7]*cphi[5] - multipole[8]*cphi[9])*0x100000000); - multipole[7]*cphi[5] - multipole[8]*cphi[9]));
// Compute the force and energy. // Compute the force and energy.
...@@ -1206,9 +1206,9 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB ...@@ -1206,9 +1206,9 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB
f = make_real3(scale*(f.x*fracToCart[0][0] + f.y*fracToCart[0][1] + f.z*fracToCart[0][2]), f = make_real3(scale*(f.x*fracToCart[0][0] + f.y*fracToCart[0][1] + f.z*fracToCart[0][2]),
scale*(f.x*fracToCart[1][0] + f.y*fracToCart[1][1] + f.z*fracToCart[1][2]), scale*(f.x*fracToCart[1][0] + f.y*fracToCart[1][1] + f.z*fracToCart[1][2]),
scale*(f.x*fracToCart[2][0] + f.y*fracToCart[2][1] + f.z*fracToCart[2][2])); scale*(f.x*fracToCart[2][0] + f.y*fracToCart[2][1] + f.z*fracToCart[2][2]));
forceBuffers[i] -= (mm_ulong) ((mm_long) (f.x*0x100000000)); forceBuffers[i] -= (mm_ulong) realToFixedPoint(f.x);
forceBuffers[i+PADDED_NUM_ATOMS] -= (mm_ulong) ((mm_long) (f.y*0x100000000)); forceBuffers[i+PADDED_NUM_ATOMS] -= (mm_ulong) realToFixedPoint(f.y);
forceBuffers[i+PADDED_NUM_ATOMS*2] -= (mm_ulong) ((mm_long) (f.z*0x100000000)); forceBuffers[i+PADDED_NUM_ATOMS*2] -= (mm_ulong) realToFixedPoint(f.z);
} }
#ifndef HIPPO #ifndef HIPPO
energyBuffer[GLOBAL_ID] += 0.25f*EPSILON_FACTOR*energy; energyBuffer[GLOBAL_ID] += 0.25f*EPSILON_FACTOR*energy;
...@@ -1233,9 +1233,9 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phidp, GLOBAL ...@@ -1233,9 +1233,9 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phidp, GLOBAL
SYNC_THREADS; SYNC_THREADS;
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI; real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = GLOBAL_ID; i < NUM_ATOMS; i += GLOBAL_SIZE) { for (int i = GLOBAL_ID; i < NUM_ATOMS; i += GLOBAL_SIZE) {
inducedField[i] -= (mm_long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i])); inducedField[i] -= realToFixedPoint(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]);
inducedField[i+PADDED_NUM_ATOMS] -= (mm_long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1])); inducedField[i+PADDED_NUM_ATOMS] -= realToFixedPoint(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]);
inducedField[i+PADDED_NUM_ATOMS*2] -= (mm_long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2])); inducedField[i+PADDED_NUM_ATOMS*2] -= realToFixedPoint(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]);
} }
} }
...@@ -1264,9 +1264,9 @@ KERNEL void calculateSelfEnergyAndTorque(GLOBAL mm_long* RESTRICT torqueBuffers, ...@@ -1264,9 +1264,9 @@ KERNEL void calculateSelfEnergyAndTorque(GLOBAL mm_long* RESTRICT torqueBuffers,
qii += qXX*qXX + qYY*qYY + qZZ*qZZ + 2*(qXY*qXY + qXZ*qXZ + qYZ*qYZ); qii += qXX*qXX + qYY*qYY + qZZ*qZZ + 2*(qXY*qXY + qXZ*qXZ + qYZ*qYZ);
c6ii += c6i*c6i; c6ii += c6i*c6i;
real3 torque = torqueScale*cross(dipole, induced); real3 torque = torqueScale*cross(dipole, induced);
torqueBuffers[i] += (mm_long) (torque.x*0x100000000); torqueBuffers[i] += realToFixedPoint(torque.x);
torqueBuffers[i+PADDED_NUM_ATOMS] += (mm_long) (torque.y*0x100000000); torqueBuffers[i+PADDED_NUM_ATOMS] += realToFixedPoint(torque.y);
torqueBuffers[i+PADDED_NUM_ATOMS*2] += (mm_long) (torque.z*0x100000000); torqueBuffers[i+PADDED_NUM_ATOMS*2] += realToFixedPoint(torque.z);
} }
real term = 2*EWALD_ALPHA*EWALD_ALPHA; real term = 2*EWALD_ALPHA*EWALD_ALPHA;
real fterm = -EPSILON_FACTOR*EWALD_ALPHA/SQRT_PI; real fterm = -EPSILON_FACTOR*EWALD_ALPHA/SQRT_PI;
...@@ -1296,12 +1296,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r ...@@ -1296,12 +1296,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
SYNC_THREADS; SYNC_THREADS;
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI; real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = GLOBAL_ID; i < NUM_ATOMS; i += GLOBAL_SIZE) { for (int i = GLOBAL_ID; i < NUM_ATOMS; i += GLOBAL_SIZE) {
inducedField[i] -= (mm_long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i])); inducedField[i] -= realToFixedPoint(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]);
inducedField[i+PADDED_NUM_ATOMS] -= (mm_long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1])); inducedField[i+PADDED_NUM_ATOMS] -= realToFixedPoint(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]);
inducedField[i+PADDED_NUM_ATOMS*2] -= (mm_long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2])); inducedField[i+PADDED_NUM_ATOMS*2] -= realToFixedPoint(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]);
inducedFieldPolar[i] -= (mm_long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[3*i])); inducedFieldPolar[i] -= realToFixedPoint(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[3*i]);
inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (mm_long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[3*i+1])); inducedFieldPolar[i+PADDED_NUM_ATOMS] -= realToFixedPoint(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[3*i+1]);
inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (mm_long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[3*i+2])); inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= realToFixedPoint(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[3*i+2]);
#ifdef EXTRAPOLATED_POLARIZATION #ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use. // Compute and store the field gradients for later use.
...@@ -1321,12 +1321,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r ...@@ -1321,12 +1321,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
Eyz += fracToCart[1][k] * EmatD[k][l] * fracToCart[2][l]; Eyz += fracToCart[1][k] * EmatD[k][l] * fracToCart[2][l];
} }
} }
ATOMIC_ADD(&fieldGradient[6*i+0], (mm_ulong) ((mm_long) (-Exx*0x100000000))); ATOMIC_ADD(&fieldGradient[6*i+0], (mm_ulong) realToFixedPoint(-Exx));
ATOMIC_ADD(&fieldGradient[6*i+1], (mm_ulong) ((mm_long) (-Eyy*0x100000000))); ATOMIC_ADD(&fieldGradient[6*i+1], (mm_ulong) realToFixedPoint(-Eyy));
ATOMIC_ADD(&fieldGradient[6*i+2], (mm_ulong) ((mm_long) (-Ezz*0x100000000))); ATOMIC_ADD(&fieldGradient[6*i+2], (mm_ulong) realToFixedPoint(-Ezz));
ATOMIC_ADD(&fieldGradient[6*i+3], (mm_ulong) ((mm_long) (-Exy*0x100000000))); ATOMIC_ADD(&fieldGradient[6*i+3], (mm_ulong) realToFixedPoint(-Exy));
ATOMIC_ADD(&fieldGradient[6*i+4], (mm_ulong) ((mm_long) (-Exz*0x100000000))); ATOMIC_ADD(&fieldGradient[6*i+4], (mm_ulong) realToFixedPoint(-Exz));
ATOMIC_ADD(&fieldGradient[6*i+5], (mm_ulong) ((mm_long) (-Eyz*0x100000000))); ATOMIC_ADD(&fieldGradient[6*i+5], (mm_ulong) realToFixedPoint(-Eyz));
real EmatP[3][3] = { real EmatP[3][3] = {
{phip[i+NUM_ATOMS*4], phip[i+NUM_ATOMS*7], phip[i+NUM_ATOMS*8]}, {phip[i+NUM_ATOMS*4], phip[i+NUM_ATOMS*7], phip[i+NUM_ATOMS*8]},
...@@ -1344,13 +1344,13 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r ...@@ -1344,13 +1344,13 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
Eyz += fracToCart[1][k] * EmatP[k][l] * fracToCart[2][l]; Eyz += fracToCart[1][k] * EmatP[k][l] * fracToCart[2][l];
} }
} }
ATOMIC_ADD(&fieldGradientPolar[6*i+0], (mm_ulong) ((mm_long) (-Exx*0x100000000))); ATOMIC_ADD(&fieldGradientPolar[6*i+0], (mm_ulong) realToFixedPoint(-Exx));
ATOMIC_ADD(&fieldGradientPolar[6*i+1], (mm_ulong) ((mm_long) (-Eyy*0x100000000))); ATOMIC_ADD(&fieldGradientPolar[6*i+1], (mm_ulong) realToFixedPoint(-Eyy));
ATOMIC_ADD(&fieldGradientPolar[6*i+2], (mm_ulong) ((mm_long) (-Ezz*0x100000000))); ATOMIC_ADD(&fieldGradientPolar[6*i+2], (mm_ulong) realToFixedPoint(-Ezz));
ATOMIC_ADD(&fieldGradientPolar[6*i+3], (mm_ulong) ((mm_long) (-Exy*0x100000000))); ATOMIC_ADD(&fieldGradientPolar[6*i+3], (mm_ulong) realToFixedPoint(-Exy));
ATOMIC_ADD(&fieldGradientPolar[6*i+4], (mm_ulong) ((mm_long) (-Exz*0x100000000))); ATOMIC_ADD(&fieldGradientPolar[6*i+4], (mm_ulong) realToFixedPoint(-Exz));
ATOMIC_ADD(&fieldGradientPolar[6*i+5], (mm_ulong) ((mm_long) (-Eyz*0x100000000))); ATOMIC_ADD(&fieldGradientPolar[6*i+5], (mm_ulong) realToFixedPoint(-Eyz));
#endif #endif
} }
} }
#endif #endif
\ No newline at end of file
...@@ -509,22 +509,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons ...@@ -509,22 +509,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons
// Store results // Store results
ATOMIC_ADD(&forceBuffers[particles.z], (mm_ulong) ((mm_long) (forces[Z].x*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.z], (mm_ulong) realToFixedPoint(forces[Z].x));
ATOMIC_ADD(&forceBuffers[particles.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Z].y*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.z+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Z].y));
ATOMIC_ADD(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Z].z*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.z+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Z].z));
if (axisType != 4) { if (axisType != 4) {
ATOMIC_ADD(&forceBuffers[particles.x], (mm_ulong) ((mm_long) (forces[X].x*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.x], (mm_ulong) realToFixedPoint(forces[X].x));
ATOMIC_ADD(&forceBuffers[particles.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[X].y*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[X].y));
ATOMIC_ADD(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[X].z*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[X].z));
} }
if ((axisType == 2 || axisType == 3) && particles.y > -1) { if ((axisType == 2 || axisType == 3) && particles.y > -1) {
ATOMIC_ADD(&forceBuffers[particles.y], (mm_ulong) ((mm_long) (forces[Y].x*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.y], (mm_ulong) realToFixedPoint(forces[Y].x));
ATOMIC_ADD(&forceBuffers[particles.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Y].y*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Y].y));
ATOMIC_ADD(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[Y].z*0x100000000))); ATOMIC_ADD(&forceBuffers[particles.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[Y].z));
} }
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) ((mm_long) (forces[I].x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) realToFixedPoint(forces[I].x));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[I].y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[I].y));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forces[I].z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forces[I].z));
} }
} }
} }
...@@ -589,4 +589,4 @@ KERNEL void computePotentialAtPoints(GLOBAL const real4* RESTRICT posq, GLOBAL c ...@@ -589,4 +589,4 @@ KERNEL void computePotentialAtPoints(GLOBAL const real4* RESTRICT posq, GLOBAL c
if (point < numPoints) if (point < numPoints)
potential[point] = p*ENERGY_SCALE_FACTOR; potential[point] = p*ENERGY_SCALE_FACTOR;
} }
} }
\ No newline at end of file
...@@ -502,12 +502,12 @@ KERNEL void computeElectrostatics( ...@@ -502,12 +502,12 @@ KERNEL void computeElectrostatics(
computeSelfEnergyAndTorque(&data, &energy); computeSelfEnergyAndTorque(&data, &energy);
data.force *= -ENERGY_SCALE_FACTOR; data.force *= -ENERGY_SCALE_FACTOR;
data.torque *= ENERGY_SCALE_FACTOR; data.torque *= ENERGY_SCALE_FACTOR;
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) ((mm_long) (data.torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) realToFixedPoint(data.torque.x));
ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.y));
ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.z));
SYNC_WARPS; SYNC_WARPS;
} }
else { else {
...@@ -535,19 +535,19 @@ KERNEL void computeElectrostatics( ...@@ -535,19 +535,19 @@ KERNEL void computeElectrostatics(
localData[LOCAL_ID].force *= -ENERGY_SCALE_FACTOR; localData[LOCAL_ID].force *= -ENERGY_SCALE_FACTOR;
localData[LOCAL_ID].torque *= ENERGY_SCALE_FACTOR; localData[LOCAL_ID].torque *= ENERGY_SCALE_FACTOR;
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (data.torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(data.torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.z));
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.z));
SYNC_WARPS; SYNC_WARPS;
} }
} }
...@@ -645,23 +645,23 @@ KERNEL void computeElectrostatics( ...@@ -645,23 +645,23 @@ KERNEL void computeElectrostatics(
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (data.force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(data.force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (data.torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(data.torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (data.torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(data.torque.z));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
offset = atomIndices[LOCAL_ID]; offset = atomIndices[LOCAL_ID];
#else #else
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
#endif #endif
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].force.z));
ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.x));
ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.y));
ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].torque.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].torque.z));
} }
pos++; pos++;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment