Unverified Commit 434d7afb authored by Anton Gorenko's avatar Anton Gorenko Committed by GitHub
Browse files

Add realToFixedPoint to all platforms (#3504)

It allows to use a faster float-to-int64 in the HIP platform.
parent ca80579a
...@@ -1651,9 +1651,9 @@ void CommonCalcCustomCentroidBondForceKernel::initialize(const System& system, c ...@@ -1651,9 +1651,9 @@ void CommonCalcCustomCentroidBondForceKernel::initialize(const System& system, c
// Save the forces to global memory. // Save the forces to global memory.
for (int i = 0; i < groupsPerBond; i++) { for (int i = 0; i < groupsPerBond; i++) {
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"], (mm_ulong) ((mm_long) (force"<<(i+1)<<".x*0x100000000)));\n"; compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"], (mm_ulong) realToFixedPoint(force"<<(i+1)<<".x));\n";
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups], (mm_ulong) ((mm_long) (force"<<(i+1)<<".y*0x100000000)));\n"; compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups], (mm_ulong) realToFixedPoint(force"<<(i+1)<<".y));\n";
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups*2], (mm_ulong) ((mm_long) (force"<<(i+1)<<".z*0x100000000)));\n"; compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups*2], (mm_ulong) realToFixedPoint(force"<<(i+1)<<".z));\n";
compute<<"MEM_FENCE;\n"; compute<<"MEM_FENCE;\n";
} }
map<string, string> replacements; map<string, string> replacements;
...@@ -2975,11 +2975,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2975,11 +2975,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
else else
tempDerivs2 << "local_" << derivName << "[tbx+tj] += temp_" << derivName << "_2;\n"; tempDerivs2 << "local_" << derivName << "[tbx+tj] += temp_" << derivName << "_2;\n";
if (useLong) { if (useLong) {
storeDeriv1 << "ATOMIC_ADD(&global_" << derivName << "[offset1], (mm_ulong) ((mm_long) (" << derivName << "*0x100000000)));\n"; storeDeriv1 << "ATOMIC_ADD(&global_" << derivName << "[offset1], (mm_ulong) realToFixedPoint(" << derivName << "));\n";
if (deviceIsCpu) if (deviceIsCpu)
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) ((mm_long) (local_" << derivName << "[tgx]*0x100000000)));\n"; storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) realToFixedPoint(local_" << derivName << "[tgx]));\n";
else else
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) ((mm_long) (local_" << derivName << "[LOCAL_ID]*0x100000000)));\n"; storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) realToFixedPoint(local_" << derivName << "[LOCAL_ID]));\n";
} }
else { else {
storeDeriv1 << "global_" << derivName << "[offset1] += " << derivName << ";\n"; storeDeriv1 << "global_" << derivName << "[offset1] += " << derivName << ";\n";
...@@ -3353,9 +3353,9 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3353,9 +3353,9 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n"; compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
} }
if (useLong) { if (useLong) {
compute << "forceBuffers[index] += (mm_long) (force.x*0x100000000);\n"; compute << "forceBuffers[index] += realToFixedPoint(force.x);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS] += (mm_long) (force.y*0x100000000);\n"; compute << "forceBuffers[index+PADDED_NUM_ATOMS] += realToFixedPoint(force.y);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS*2] += (mm_long) (force.z*0x100000000);\n"; compute << "forceBuffers[index+PADDED_NUM_ATOMS*2] += realToFixedPoint(force.z);\n";
} }
else else
compute << "forceBuffers[index] = forceBuffers[index]+make_real4(force.x, force.y, force.z, 0);\n"; compute << "forceBuffers[index] = forceBuffers[index]+make_real4(force.x, force.y, force.z, 0);\n";
......
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (deriv##INDEX##_1*0x100000000))); #define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_deriv##INDEX[LOCAL_ID]*0x100000000))); #define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[LOCAL_ID]));
#else #else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1; #define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID]; #define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID];
...@@ -162,15 +162,15 @@ KERNEL void computeN2Energy( ...@@ -162,15 +162,15 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].z));
STORE_DERIVATIVES_2 STORE_DERIVATIVES_2
} }
#else #else
...@@ -364,15 +364,15 @@ KERNEL void computeN2Energy( ...@@ -364,15 +364,15 @@ KERNEL void computeN2Energy(
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
unsigned int offset = atom1; unsigned int offset = atom1;
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].x));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].y));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].z));
offset = atom2; offset = atom2;
STORE_DERIVATIVES_2 STORE_DERIVATIVES_2
} }
......
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (deriv##INDEX##_1*0x100000000))); #define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_deriv##INDEX[tgx]*0x100000000))); #define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[tgx]));
#else #else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1; #define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx]; #define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx];
...@@ -102,9 +102,9 @@ KERNEL void computeN2Energy( ...@@ -102,9 +102,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1; unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
...@@ -176,9 +176,9 @@ KERNEL void computeN2Energy( ...@@ -176,9 +176,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1; unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
...@@ -192,9 +192,9 @@ KERNEL void computeN2Energy( ...@@ -192,9 +192,9 @@ KERNEL void computeN2Energy(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) { for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE+tgx; unsigned int offset = y*TILE_SIZE+tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (local_force[tgx].x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(local_force[tgx].x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].z));
STORE_DERIVATIVES_2 STORE_DERIVATIVES_2
#else #else
unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS;
...@@ -318,9 +318,9 @@ KERNEL void computeN2Energy( ...@@ -318,9 +318,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1; unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
...@@ -377,9 +377,9 @@ KERNEL void computeN2Energy( ...@@ -377,9 +377,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1; unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
...@@ -399,9 +399,9 @@ KERNEL void computeN2Energy( ...@@ -399,9 +399,9 @@ KERNEL void computeN2Energy(
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (local_force[tgx].x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(local_force[tgx].x));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].y));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].z));
unsigned int offset = atom2; unsigned int offset = atom2;
STORE_DERIVATIVES_2 STORE_DERIVATIVES_2
#else #else
......
...@@ -20,9 +20,9 @@ KERNEL void computeGradientChainRuleTerms(GLOBAL const real4* RESTRICT posq, ...@@ -20,9 +20,9 @@ KERNEL void computeGradientChainRuleTerms(GLOBAL const real4* RESTRICT posq,
#endif #endif
COMPUTE_FORCES COMPUTE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
forceBuffers[index] = (mm_long) (force.x*0x100000000); forceBuffers[index] = realToFixedPoint(force.x);
forceBuffers[index+PADDED_NUM_ATOMS] = (mm_long) (force.y*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS] = realToFixedPoint(force.y);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (mm_long) (force.z*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS*2] = realToFixedPoint(force.z);
#else #else
forceBuffers[index] = make_real4(force.x, force.y, force.z, 0); forceBuffers[index] = make_real4(force.x, force.y, force.z, 0);
#endif #endif
......
...@@ -139,11 +139,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -139,11 +139,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = x*TILE_SIZE + tgx; unsigned int offset1 = x*TILE_SIZE + tgx;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000))); ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
STORE_PARAM_DERIVS1 STORE_PARAM_DERIVS1
if (x != y) { if (x != y) {
unsigned int offset2 = y*TILE_SIZE + tgx; unsigned int offset2 = y*TILE_SIZE + tgx;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[LOCAL_ID]*0x100000000))); ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[LOCAL_ID]));
STORE_PARAM_DERIVS2 STORE_PARAM_DERIVS2
} }
#else #else
...@@ -319,11 +319,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -319,11 +319,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#endif #endif
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1; unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000))); ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
STORE_PARAM_DERIVS1 STORE_PARAM_DERIVS1
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2; unsigned int offset2 = atom2;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[LOCAL_ID]*0x100000000))); ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[LOCAL_ID]));
STORE_PARAM_DERIVS2 STORE_PARAM_DERIVS2
} }
#else #else
......
...@@ -86,7 +86,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -86,7 +86,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1; unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000))); ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else #else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value; global_value[offset1] += value;
...@@ -148,7 +148,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -148,7 +148,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1; unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000))); ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else #else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value; global_value[offset1] += value;
...@@ -161,7 +161,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -161,7 +161,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
for (int tgx = 0; tgx < TILE_SIZE; tgx++) { for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset2 = y*TILE_SIZE+tgx; unsigned int offset2 = y*TILE_SIZE+tgx;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[tgx]*0x100000000))); ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[tgx]));
#else #else
unsigned int offset2 = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset2 = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[tgx]; global_value[offset2] += local_value[tgx];
...@@ -275,7 +275,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -275,7 +275,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1; unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000))); ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else #else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value; global_value[offset1] += value;
...@@ -324,7 +324,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -324,7 +324,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1; unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000))); ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else #else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value; global_value[offset1] += value;
...@@ -344,7 +344,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig ...@@ -344,7 +344,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset2 = atom2; unsigned int offset2 = atom2;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[tgx]*0x100000000))); ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[tgx]));
#else #else
unsigned int offset2 = atom2 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset2 = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[tgx]; global_value[offset2] += local_value[tgx];
......
...@@ -116,21 +116,21 @@ KERNEL void computeDonorForces( ...@@ -116,21 +116,21 @@ KERNEL void computeDonorForces(
if (donorIndex < NUM_DONORS) { if (donorIndex < NUM_DONORS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
if (atoms.x > -1) { if (atoms.x > -1) {
ATOMIC_ADD(&force[atoms.x], (mm_ulong) ((mm_long) (f1.x*0x100000000))); ATOMIC_ADD(&force[atoms.x], (mm_ulong) realToFixedPoint(f1.x));
ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.y*0x100000000))); ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.y));
ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.z*0x100000000))); ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.z));
MEM_FENCE; MEM_FENCE;
} }
if (atoms.y > -1) { if (atoms.y > -1) {
ATOMIC_ADD(&force[atoms.y], (mm_ulong) ((mm_long) (f2.x*0x100000000))); ATOMIC_ADD(&force[atoms.y], (mm_ulong) realToFixedPoint(f2.x));
ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.y*0x100000000))); ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.y));
ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.z*0x100000000))); ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.z));
MEM_FENCE; MEM_FENCE;
} }
if (atoms.z > -1) { if (atoms.z > -1) {
ATOMIC_ADD(&force[atoms.z], (mm_ulong) ((mm_long) (f3.x*0x100000000))); ATOMIC_ADD(&force[atoms.z], (mm_ulong) realToFixedPoint(f3.x));
ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.y*0x100000000))); ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.y));
ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.z*0x100000000))); ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.z));
MEM_FENCE; MEM_FENCE;
} }
#else #else
...@@ -233,21 +233,21 @@ KERNEL void computeAcceptorForces( ...@@ -233,21 +233,21 @@ KERNEL void computeAcceptorForces(
if (acceptorIndex < NUM_ACCEPTORS) { if (acceptorIndex < NUM_ACCEPTORS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
if (atoms.x > -1) { if (atoms.x > -1) {
ATOMIC_ADD(&force[atoms.x], (mm_ulong) ((mm_long) (f1.x*0x100000000))); ATOMIC_ADD(&force[atoms.x], (mm_ulong) realToFixedPoint(f1.x));
ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.y*0x100000000))); ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.y));
ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.z*0x100000000))); ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.z));
MEM_FENCE; MEM_FENCE;
} }
if (atoms.y > -1) { if (atoms.y > -1) {
ATOMIC_ADD(&force[atoms.y], (mm_ulong) ((mm_long) (f2.x*0x100000000))); ATOMIC_ADD(&force[atoms.y], (mm_ulong) realToFixedPoint(f2.x));
ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.y*0x100000000))); ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.y));
ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.z*0x100000000))); ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.z));
MEM_FENCE; MEM_FENCE;
} }
if (atoms.z > -1) { if (atoms.z > -1) {
ATOMIC_ADD(&force[atoms.z], (mm_ulong) ((mm_long) (f3.x*0x100000000))); ATOMIC_ADD(&force[atoms.z], (mm_ulong) realToFixedPoint(f3.x));
ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.y*0x100000000))); ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.y));
ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.z*0x100000000))); ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.z));
MEM_FENCE; MEM_FENCE;
} }
#else #else
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
* Record the force on an atom to global memory. * Record the force on an atom to global memory.
*/ */
inline DEVICE void storeForce(int atom, real3 force, GLOBAL mm_ulong* RESTRICT forceBuffers) { inline DEVICE void storeForce(int atom, real3 force, GLOBAL mm_ulong* RESTRICT forceBuffers) {
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
} }
/** /**
......
...@@ -137,13 +137,13 @@ KERNEL void computeInteractionGroups( ...@@ -137,13 +137,13 @@ KERNEL void computeInteractionGroups(
} }
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
if (exclusions != 0) { if (exclusions != 0) {
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
} }
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
SYNC_WARPS; SYNC_WARPS;
#else #else
writeForces(forceBuffers, localData, atom2); writeForces(forceBuffers, localData, atom2);
......
...@@ -102,9 +102,9 @@ KERNEL void calculateEwaldForces(GLOBAL mm_long* RESTRICT forceBuffers, GLOBAL c ...@@ -102,9 +102,9 @@ KERNEL void calculateEwaldForces(GLOBAL mm_long* RESTRICT forceBuffers, GLOBAL c
// Record the force on the atom. // Record the force on the atom.
forceBuffers[atom] += (mm_long) (force.x*0x100000000); forceBuffers[atom] += realToFixedPoint(force.x);
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_long) (force.y*0x100000000); forceBuffers[atom+PADDED_NUM_ATOMS] += realToFixedPoint(force.y);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_long) (force.z*0x100000000); forceBuffers[atom+2*PADDED_NUM_ATOMS] += realToFixedPoint(force.z);
atom += GLOBAL_SIZE; atom += GLOBAL_SIZE;
} }
} }
...@@ -384,19 +384,19 @@ KERNEL void computeForce( ...@@ -384,19 +384,19 @@ KERNEL void computeForce(
real sigma = data1.sig.x+data2.sig.x; real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x; real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy); computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) ((mm_long) (force2.x*0x100000000))); ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) realToFixedPoint(force2.x));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.y*0x100000000))); ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.y));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.z*0x100000000))); ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.z));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) ((mm_long) (torque2.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) realToFixedPoint(torque2.x));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.y));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.z));
} }
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) ((mm_long) (force1.x*0x100000000))); ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) realToFixedPoint(force1.x));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.y*0x100000000))); ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.y));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.z*0x100000000))); ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.z));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) ((mm_long) (torque1.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) realToFixedPoint(torque1.x));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.y));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.z));
} }
#else #else
for (int atom1 = GLOBAL_ID; atom1 < numAtoms; atom1 += GLOBAL_SIZE) { for (int atom1 = GLOBAL_ID; atom1 < numAtoms; atom1 += GLOBAL_SIZE) {
...@@ -432,19 +432,19 @@ KERNEL void computeForce( ...@@ -432,19 +432,19 @@ KERNEL void computeForce(
real sigma = data1.sig.x+data2.sig.x; real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x; real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy); computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) ((mm_long) (force2.x*0x100000000))); ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) realToFixedPoint(force2.x));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.y*0x100000000))); ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.y));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.z*0x100000000))); ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.z));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) ((mm_long) (torque2.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) realToFixedPoint(torque2.x));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.y));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.z));
} }
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) ((mm_long) (force1.x*0x100000000))); ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) realToFixedPoint(force1.x));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.y*0x100000000))); ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.y));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.z*0x100000000))); ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.z));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) ((mm_long) (torque1.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) realToFixedPoint(torque1.x));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.y));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.z));
} }
#endif #endif
...@@ -466,18 +466,18 @@ KERNEL void computeForce( ...@@ -466,18 +466,18 @@ KERNEL void computeForce(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
computeOneInteraction(&data1, &data2, params.x, params.y, delta, r2, &force1, &force2, &torque1, &torque2, &energy); computeOneInteraction(&data1, &data2, params.x, params.y, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) ((mm_long) (force1.x*0x100000000))); ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) realToFixedPoint(force1.x));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.y*0x100000000))); ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.y));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.z*0x100000000))); ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.z));
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) ((mm_long) (force2.x*0x100000000))); ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) realToFixedPoint(force2.x));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.y*0x100000000))); ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.y));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.z*0x100000000))); ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.z));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) ((mm_long) (torque1.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) realToFixedPoint(torque1.x));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.y));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.z));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) ((mm_long) (torque2.x*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) realToFixedPoint(torque2.x));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.y*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.y));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.z*0x100000000))); ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.z));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -522,16 +522,16 @@ KERNEL void applyTorques( ...@@ -522,16 +522,16 @@ KERNEL void applyTorques(
yforce += f; yforce += f;
force -= f; force -= f;
} }
ATOMIC_ADD(&forceBuffers[originalIndex], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[originalIndex], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[originalIndex+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[originalIndex+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&forceBuffers[axisParticles.x], (mm_ulong) ((mm_long) (xforce.x*0x100000000))); ATOMIC_ADD(&forceBuffers[axisParticles.x], (mm_ulong) realToFixedPoint(xforce.x));
ATOMIC_ADD(&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (xforce.y*0x100000000))); ATOMIC_ADD(&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(xforce.y));
ATOMIC_ADD(&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (xforce.z*0x100000000))); ATOMIC_ADD(&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(xforce.z));
if (axisParticles.y != -1) { if (axisParticles.y != -1) {
ATOMIC_ADD(&forceBuffers[axisParticles.y], (mm_ulong) ((mm_long) (yforce.x*0x100000000))); ATOMIC_ADD(&forceBuffers[axisParticles.y], (mm_ulong) realToFixedPoint(yforce.x));
ATOMIC_ADD(&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (yforce.y*0x100000000))); ATOMIC_ADD(&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(yforce.y));
ATOMIC_ADD(&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (yforce.z*0x100000000))); ATOMIC_ADD(&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(yforce.z));
} }
} }
} }
......
...@@ -148,10 +148,10 @@ KERNEL void computeBornSum( ...@@ -148,10 +148,10 @@ KERNEL void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) ((mm_long) (bornSum*0x100000000))); ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) realToFixedPoint(bornSum));
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].bornSum*0x100000000))); ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].bornSum));
} }
#else #else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS; unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
...@@ -352,9 +352,9 @@ KERNEL void computeBornSum( ...@@ -352,9 +352,9 @@ KERNEL void computeBornSum(
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_ulong) ((mm_long) (bornSum*0x100000000))); ATOMIC_ADD(&global_bornSum[atom1], (mm_ulong) realToFixedPoint(bornSum));
if (atom2 < PADDED_NUM_ATOMS) if (atom2 < PADDED_NUM_ATOMS)
ATOMIC_ADD(&global_bornSum[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].bornSum*0x100000000))); ATOMIC_ADD(&global_bornSum[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].bornSum));
#else #else
unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = atom2 + warp*PADDED_NUM_ATOMS; unsigned int offset2 = atom2 + warp*PADDED_NUM_ATOMS;
...@@ -534,16 +534,16 @@ KERNEL void computeGBSAForce1( ...@@ -534,16 +534,16 @@ KERNEL void computeGBSAForce1(
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) ((mm_long) (force.w*0x100000000))); ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) realToFixedPoint(force.w));
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000))); ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000))); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fw*0x100000000))); ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fw));
} }
#else #else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS; unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
...@@ -758,15 +758,15 @@ KERNEL void computeGBSAForce1( ...@@ -758,15 +758,15 @@ KERNEL void computeGBSAForce1(
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], (mm_ulong) ((mm_long) (force.w*0x100000000))); ATOMIC_ADD(&global_bornForce[atom1], (mm_ulong) realToFixedPoint(force.w));
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000))); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
ATOMIC_ADD(&global_bornForce[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fw*0x100000000))); ATOMIC_ADD(&global_bornForce[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fw));
} }
#else #else
unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS;
......
...@@ -74,7 +74,7 @@ KERNEL void reduceBornForce( ...@@ -74,7 +74,7 @@ KERNEL void reduceBornForce(
energy += saTerm; energy += saTerm;
force *= bornRadius*bornRadius*obcChain[index]; force *= bornRadius*bornRadius*obcChain[index];
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
bornForce[index] = (mm_long) (force*0x100000000); bornForce[index] = realToFixedPoint(force);
#else #else
bornForce[index] = force; bornForce[index] = force;
#endif #endif
......
...@@ -88,7 +88,7 @@ KERNEL void computeBornSum( ...@@ -88,7 +88,7 @@ KERNEL void computeBornSum(
// Write results. // Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000)); ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum; global_bornSum[offset] += bornSum;
...@@ -150,7 +150,7 @@ KERNEL void computeBornSum( ...@@ -150,7 +150,7 @@ KERNEL void computeBornSum(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000)); ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum; global_bornSum[offset] += bornSum;
...@@ -162,7 +162,7 @@ KERNEL void computeBornSum( ...@@ -162,7 +162,7 @@ KERNEL void computeBornSum(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) { for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE + tgx; unsigned int offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&global_bornSum[offset], (mm_long) (localData[tgx].bornSum*0x100000000)); ATOMIC_ADD(&global_bornSum[offset], realToFixedPoint(localData[tgx].bornSum));
#else #else
unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += localData[tgx].bornSum; global_bornSum[offset] += localData[tgx].bornSum;
...@@ -297,7 +297,7 @@ KERNEL void computeBornSum( ...@@ -297,7 +297,7 @@ KERNEL void computeBornSum(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000)); ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum; global_bornSum[offset] += bornSum;
...@@ -360,7 +360,7 @@ KERNEL void computeBornSum( ...@@ -360,7 +360,7 @@ KERNEL void computeBornSum(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000)); ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum; global_bornSum[offset] += bornSum;
...@@ -378,7 +378,7 @@ KERNEL void computeBornSum( ...@@ -378,7 +378,7 @@ KERNEL void computeBornSum(
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom2], (mm_long) (localData[tgx].bornSum*0x100000000)); ATOMIC_ADD(&global_bornSum[atom2], realToFixedPoint(localData[tgx].bornSum));
#else #else
unsigned int offset = atom2 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom2 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += localData[tgx].bornSum; global_bornSum[offset] += localData[tgx].bornSum;
...@@ -491,10 +491,10 @@ KERNEL void computeGBSAForce1( ...@@ -491,10 +491,10 @@ KERNEL void computeGBSAForce1(
// Write results. // Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000)); ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0); forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
...@@ -562,10 +562,10 @@ KERNEL void computeGBSAForce1( ...@@ -562,10 +562,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000)); ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0); forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
...@@ -578,10 +578,10 @@ KERNEL void computeGBSAForce1( ...@@ -578,10 +578,10 @@ KERNEL void computeGBSAForce1(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) { for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE + tgx; unsigned int offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_long) (localData[tgx].fx*0x100000000)); ATOMIC_ADD(&forceBuffers[offset], realToFixedPoint(localData[tgx].fx));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fy*0x100000000)); ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fz*0x100000000)); ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
ATOMIC_ADD(&global_bornForce[offset], (mm_long) (localData[tgx].fw*0x100000000)); ATOMIC_ADD(&global_bornForce[offset], realToFixedPoint(localData[tgx].fw));
#else #else
unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset]; real4 f = forceBuffers[offset];
...@@ -723,10 +723,10 @@ KERNEL void computeGBSAForce1( ...@@ -723,10 +723,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000)); ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0); forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
...@@ -791,10 +791,10 @@ KERNEL void computeGBSAForce1( ...@@ -791,10 +791,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000)); ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000)); ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else #else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0); forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
...@@ -813,10 +813,10 @@ KERNEL void computeGBSAForce1( ...@@ -813,10 +813,10 @@ KERNEL void computeGBSAForce1(
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom2], (mm_long) (localData[tgx].fx*0x100000000)); ATOMIC_ADD(&forceBuffers[atom2], realToFixedPoint(localData[tgx].fx));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fy*0x100000000)); ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fz*0x100000000)); ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
ATOMIC_ADD(&global_bornForce[atom2], (mm_long) (localData[tgx].fw*0x100000000)); ATOMIC_ADD(&global_bornForce[atom2], realToFixedPoint(localData[tgx].fw));
#else #else
unsigned int offset = atom2 + GROUP_ID*PADDED_NUM_ATOMS; unsigned int offset = atom2 + GROUP_ID*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset]; real4 f = forceBuffers[offset];
......
...@@ -896,13 +896,13 @@ inline DEVICE real3 loadForce(int index, GLOBAL const mm_long* RESTRICT force) { ...@@ -896,13 +896,13 @@ inline DEVICE real3 loadForce(int index, GLOBAL const mm_long* RESTRICT force) {
inline DEVICE void addForce(int index, GLOBAL mm_long* RESTRICT force, real3 value) { inline DEVICE void addForce(int index, GLOBAL mm_long* RESTRICT force, real3 value) {
GLOBAL mm_ulong* f = (GLOBAL mm_ulong*) force; GLOBAL mm_ulong* f = (GLOBAL mm_ulong*) force;
#ifdef HAS_OVERLAPPING_VSITES #ifdef HAS_OVERLAPPING_VSITES
ATOMIC_ADD(&f[index], (mm_ulong) ((mm_long) (value.x*0x100000000))); ATOMIC_ADD(&f[index], (mm_ulong) realToFixedPoint(value.x));
ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (value.y*0x100000000))); ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(value.y));
ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS*2], (mm_ulong) ((mm_long) (value.z*0x100000000))); ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS*2], (mm_ulong) realToFixedPoint(value.z));
#else #else
f[index] += (mm_ulong) ((mm_long) (value.x*0x100000000)); f[index] += (mm_ulong) realToFixedPoint(value.x);
f[index+PADDED_NUM_ATOMS] += (mm_ulong) ((mm_long) (value.y*0x100000000)); f[index+PADDED_NUM_ATOMS] += (mm_ulong) realToFixedPoint(value.y);
f[index+PADDED_NUM_ATOMS*2] += (mm_ulong) ((mm_long) (value.z*0x100000000)); f[index+PADDED_NUM_ATOMS*2] += (mm_ulong) realToFixedPoint(value.z);
#endif #endif
} }
......
...@@ -154,7 +154,7 @@ KERNEL void gridSpreadCharge(GLOBAL const real4* RESTRICT posq, ...@@ -154,7 +154,7 @@ KERNEL void gridSpreadCharge(GLOBAL const real4* RESTRICT posq,
int index = ybase + zindexTable[zindex]; int index = ybase + zindexTable[zindex];
real add = dxdy*data[iz].z; real add = dxdy*data[iz].z;
#ifdef USE_FIXED_POINT_CHARGE_SPREADING #ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD(&pmeGrid[index], (mm_ulong) ((mm_long) (add*0x100000000))); ATOMIC_ADD(&pmeGrid[index], (mm_ulong) realToFixedPoint(add));
#else #else
ATOMIC_ADD(&pmeGrid[index], add); ATOMIC_ADD(&pmeGrid[index], add);
#endif #endif
...@@ -593,13 +593,13 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul ...@@ -593,13 +593,13 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y); real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z); real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
#ifdef USE_PME_STREAM #ifdef USE_PME_STREAM
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) ((mm_long) (forceX*0x100000000))); ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) realToFixedPoint(forceX));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forceY*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forceY));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forceZ*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forceZ));
#else #else
forceBuffers[atom] += (mm_ulong) ((mm_long) (forceX*0x100000000)); forceBuffers[atom] += (mm_ulong) realToFixedPoint(forceX);
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_ulong) ((mm_long) (forceY*0x100000000)); forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_ulong) realToFixedPoint(forceY);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_ulong) ((mm_long) (forceZ*0x100000000)); forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_ulong) realToFixedPoint(forceZ);
#endif #endif
} }
} }
...@@ -607,9 +607,9 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul ...@@ -607,9 +607,9 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
KERNEL void addForces(GLOBAL const real4* RESTRICT forces, GLOBAL mm_long* RESTRICT forceBuffers) { KERNEL void addForces(GLOBAL const real4* RESTRICT forces, GLOBAL mm_long* RESTRICT forceBuffers) {
for (int atom = GLOBAL_ID; atom < NUM_ATOMS; atom += GLOBAL_SIZE) { for (int atom = GLOBAL_ID; atom < NUM_ATOMS; atom += GLOBAL_SIZE) {
real4 f = forces[atom]; real4 f = forces[atom];
forceBuffers[atom] += (mm_long) (f.x*0x100000000); forceBuffers[atom] += realToFixedPoint(f.x);
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_long) (f.y*0x100000000); forceBuffers[atom+PADDED_NUM_ATOMS] += realToFixedPoint(f.y);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_long) (f.z*0x100000000); forceBuffers[atom+2*PADDED_NUM_ATOMS] += realToFixedPoint(f.z);
} }
} }
......
...@@ -90,8 +90,8 @@ KERNEL void computeRMSDForces(int numParticles, int paddedNumAtoms, GLOBAL const ...@@ -90,8 +90,8 @@ KERNEL void computeRMSDForces(int numParticles, int paddedNumAtoms, GLOBAL const
buffer[1]*refPos.x + buffer[4]*refPos.y + buffer[7]*refPos.z, buffer[1]*refPos.x + buffer[4]*refPos.y + buffer[7]*refPos.z,
buffer[2]*refPos.x + buffer[5]*refPos.y + buffer[8]*refPos.z); buffer[2]*refPos.x + buffer[5]*refPos.y + buffer[8]*refPos.z);
real3 force = (rotatedRef-pos)*scale; real3 force = (rotatedRef-pos)*scale;
forceBuffers[index] += (mm_long) (force.x*0x100000000); forceBuffers[index] += realToFixedPoint(force.x);
forceBuffers[index+paddedNumAtoms] += (mm_long) (force.y*0x100000000); forceBuffers[index+paddedNumAtoms] += realToFixedPoint(force.y);
forceBuffers[index+2*paddedNumAtoms] += (mm_long) (force.z*0x100000000); forceBuffers[index+2*paddedNumAtoms] += realToFixedPoint(force.z);
} }
} }
...@@ -166,9 +166,9 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int ...@@ -166,9 +166,9 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
} }
s<<computeForce<<"\n"; s<<computeForce<<"\n";
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".x*0x100000000)));\n"; s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"], static_cast<unsigned long long>(realToFixedPoint(force"<<(i+1)<<".x)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".y*0x100000000)));\n"; s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force"<<(i+1)<<".y)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".z*0x100000000)));\n"; s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>(realToFixedPoint(force"<<(i+1)<<".z)));\n";
s<<" __threadfence_block();\n"; s<<" __threadfence_block();\n";
} }
s<<"}\n"; s<<"}\n";
......
...@@ -24,3 +24,7 @@ typedef unsigned long long mm_ulong; ...@@ -24,3 +24,7 @@ typedef unsigned long long mm_ulong;
#define SUPPORTS_64_BIT_ATOMICS 1 #define SUPPORTS_64_BIT_ATOMICS 1
#define SUPPORTS_DOUBLE_PRECISION 1 #define SUPPORTS_DOUBLE_PRECISION 1
__device__ inline long long realToFixedPoint(real x) {
return static_cast<long long>(x * 0x100000000);
}
...@@ -46,11 +46,11 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) { ...@@ -46,11 +46,11 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
*/ */
__device__ void saveSingleForce(int atom, real3 force, unsigned long long* forceBuffers) { __device__ void saveSingleForce(int atom, real3 force, unsigned long long* forceBuffers) {
if (force.x != 0) if (force.x != 0)
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (force.x*0x100000000))); atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>(realToFixedPoint(force.x)));
if (force.y != 0) if (force.y != 0)
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000))); atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
if (force.z != 0) if (force.z != 0)
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000))); atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
} }
/** /**
...@@ -310,22 +310,22 @@ extern "C" __global__ void computeNonbonded( ...@@ -310,22 +310,22 @@ extern "C" __global__ void computeNonbonded(
// write results for off diagonal tiles // write results for off diagonal tiles
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
#else #else
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fx)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fy)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fz)));
#endif #endif
#endif #endif
} }
// Write results for on and off diagonal tiles // Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (force.x*0x100000000))); atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(force.x)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
#endif #endif
} }
...@@ -581,9 +581,9 @@ extern "C" __global__ void computeNonbonded( ...@@ -581,9 +581,9 @@ extern "C" __global__ void computeNonbonded(
// Write results. // Write results.
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (force.x*0x100000000))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>(realToFixedPoint(force.x)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000))); atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000))); atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[threadIdx.x]; unsigned int atom2 = atomIndices[threadIdx.x];
#else #else
...@@ -591,13 +591,13 @@ extern "C" __global__ void computeNonbonded( ...@@ -591,13 +591,13 @@ extern "C" __global__ void computeNonbonded(
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000))); atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000))); atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000))); atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
#else #else
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000))); atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fx)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000))); atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fy)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000))); atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fz)));
#endif #endif
} }
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment