Unverified Commit 434d7afb authored by Anton Gorenko's avatar Anton Gorenko Committed by GitHub
Browse files

Add realToFixedPoint to all platforms (#3504)

It allows to use a faster float-to-int64 in the HIP platform.
parent ca80579a
......@@ -1651,9 +1651,9 @@ void CommonCalcCustomCentroidBondForceKernel::initialize(const System& system, c
// Save the forces to global memory.
for (int i = 0; i < groupsPerBond; i++) {
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"], (mm_ulong) ((mm_long) (force"<<(i+1)<<".x*0x100000000)));\n";
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups], (mm_ulong) ((mm_long) (force"<<(i+1)<<".y*0x100000000)));\n";
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups*2], (mm_ulong) ((mm_long) (force"<<(i+1)<<".z*0x100000000)));\n";
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"], (mm_ulong) realToFixedPoint(force"<<(i+1)<<".x));\n";
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups], (mm_ulong) realToFixedPoint(force"<<(i+1)<<".y));\n";
compute<<"ATOMIC_ADD(&groupForce[group"<<(i+1)<<"+numParticleGroups*2], (mm_ulong) realToFixedPoint(force"<<(i+1)<<".z));\n";
compute<<"MEM_FENCE;\n";
}
map<string, string> replacements;
......@@ -2975,11 +2975,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
else
tempDerivs2 << "local_" << derivName << "[tbx+tj] += temp_" << derivName << "_2;\n";
if (useLong) {
storeDeriv1 << "ATOMIC_ADD(&global_" << derivName << "[offset1], (mm_ulong) ((mm_long) (" << derivName << "*0x100000000)));\n";
storeDeriv1 << "ATOMIC_ADD(&global_" << derivName << "[offset1], (mm_ulong) realToFixedPoint(" << derivName << "));\n";
if (deviceIsCpu)
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) ((mm_long) (local_" << derivName << "[tgx]*0x100000000)));\n";
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) realToFixedPoint(local_" << derivName << "[tgx]));\n";
else
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) ((mm_long) (local_" << derivName << "[LOCAL_ID]*0x100000000)));\n";
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) realToFixedPoint(local_" << derivName << "[LOCAL_ID]));\n";
}
else {
storeDeriv1 << "global_" << derivName << "[offset1] += " << derivName << ";\n";
......@@ -3353,9 +3353,9 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
}
if (useLong) {
compute << "forceBuffers[index] += (mm_long) (force.x*0x100000000);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS] += (mm_long) (force.y*0x100000000);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS*2] += (mm_long) (force.z*0x100000000);\n";
compute << "forceBuffers[index] += realToFixedPoint(force.x);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS] += realToFixedPoint(force.y);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS*2] += realToFixedPoint(force.z);\n";
}
else
compute << "forceBuffers[index] = forceBuffers[index]+make_real4(force.x, force.y, force.z, 0);\n";
......
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (deriv##INDEX##_1*0x100000000)));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_deriv##INDEX[LOCAL_ID]*0x100000000)));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[LOCAL_ID]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID];
......@@ -162,15 +162,15 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1
if (x != y) {
offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].z*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].z));
STORE_DERIVATIVES_2
}
#else
......@@ -364,15 +364,15 @@ KERNEL void computeN2Energy(
unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
unsigned int offset = atom1;
STORE_DERIVATIVES_1
if (atom2 < PADDED_NUM_ATOMS) {
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].x*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].y*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[LOCAL_ID].z*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].x));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].y));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[LOCAL_ID].z));
offset = atom2;
STORE_DERIVATIVES_2
}
......
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (deriv##INDEX##_1*0x100000000)));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_deriv##INDEX[tgx]*0x100000000)));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[tgx]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx];
......@@ -102,9 +102,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
......@@ -176,9 +176,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
......@@ -192,9 +192,9 @@ KERNEL void computeN2Energy(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE+tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (local_force[tgx].x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].z*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(local_force[tgx].x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].z));
STORE_DERIVATIVES_2
#else
unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS;
......@@ -318,9 +318,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
......@@ -377,9 +377,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = atom1;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
STORE_DERIVATIVES_1
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
......@@ -399,9 +399,9 @@ KERNEL void computeN2Energy(
#endif
if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (local_force[tgx].x*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].y*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (local_force[tgx].z*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(local_force[tgx].x));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].y));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].z));
unsigned int offset = atom2;
STORE_DERIVATIVES_2
#else
......
......@@ -20,9 +20,9 @@ KERNEL void computeGradientChainRuleTerms(GLOBAL const real4* RESTRICT posq,
#endif
COMPUTE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
forceBuffers[index] = (mm_long) (force.x*0x100000000);
forceBuffers[index+PADDED_NUM_ATOMS] = (mm_long) (force.y*0x100000000);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (mm_long) (force.z*0x100000000);
forceBuffers[index] = realToFixedPoint(force.x);
forceBuffers[index+PADDED_NUM_ATOMS] = realToFixedPoint(force.y);
forceBuffers[index+PADDED_NUM_ATOMS*2] = realToFixedPoint(force.z);
#else
forceBuffers[index] = make_real4(force.x, force.y, force.z, 0);
#endif
......
......@@ -139,11 +139,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = x*TILE_SIZE + tgx;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000)));
ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
STORE_PARAM_DERIVS1
if (x != y) {
unsigned int offset2 = y*TILE_SIZE + tgx;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[LOCAL_ID]*0x100000000)));
ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[LOCAL_ID]));
STORE_PARAM_DERIVS2
}
#else
......@@ -319,11 +319,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000)));
ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
STORE_PARAM_DERIVS1
if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[LOCAL_ID]*0x100000000)));
ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[LOCAL_ID]));
STORE_PARAM_DERIVS2
}
#else
......
......@@ -86,7 +86,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000)));
ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
......@@ -148,7 +148,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000)));
ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
......@@ -161,7 +161,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset2 = y*TILE_SIZE+tgx;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[tgx]*0x100000000)));
ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[tgx]));
#else
unsigned int offset2 = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[tgx];
......@@ -275,7 +275,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000)));
ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
......@@ -324,7 +324,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset1 = atom1;
ATOMIC_ADD(&global_value[offset1], (mm_ulong) ((mm_long) (value*0x100000000)));
ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
#else
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
......@@ -344,7 +344,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset2 = atom2;
ATOMIC_ADD(&global_value[offset2], (mm_ulong) ((mm_long) (local_value[tgx]*0x100000000)));
ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[tgx]));
#else
unsigned int offset2 = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[tgx];
......
......@@ -116,21 +116,21 @@ KERNEL void computeDonorForces(
if (donorIndex < NUM_DONORS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
if (atoms.x > -1) {
ATOMIC_ADD(&force[atoms.x], (mm_ulong) ((mm_long) (f1.x*0x100000000)));
ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.y*0x100000000)));
ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.z*0x100000000)));
ATOMIC_ADD(&force[atoms.x], (mm_ulong) realToFixedPoint(f1.x));
ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.y));
ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.z));
MEM_FENCE;
}
if (atoms.y > -1) {
ATOMIC_ADD(&force[atoms.y], (mm_ulong) ((mm_long) (f2.x*0x100000000)));
ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.y*0x100000000)));
ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.z*0x100000000)));
ATOMIC_ADD(&force[atoms.y], (mm_ulong) realToFixedPoint(f2.x));
ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.y));
ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.z));
MEM_FENCE;
}
if (atoms.z > -1) {
ATOMIC_ADD(&force[atoms.z], (mm_ulong) ((mm_long) (f3.x*0x100000000)));
ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.y*0x100000000)));
ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.z*0x100000000)));
ATOMIC_ADD(&force[atoms.z], (mm_ulong) realToFixedPoint(f3.x));
ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.y));
ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.z));
MEM_FENCE;
}
#else
......@@ -233,21 +233,21 @@ KERNEL void computeAcceptorForces(
if (acceptorIndex < NUM_ACCEPTORS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
if (atoms.x > -1) {
ATOMIC_ADD(&force[atoms.x], (mm_ulong) ((mm_long) (f1.x*0x100000000)));
ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.y*0x100000000)));
ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.z*0x100000000)));
ATOMIC_ADD(&force[atoms.x], (mm_ulong) realToFixedPoint(f1.x));
ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.y));
ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f1.z));
MEM_FENCE;
}
if (atoms.y > -1) {
ATOMIC_ADD(&force[atoms.y], (mm_ulong) ((mm_long) (f2.x*0x100000000)));
ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.y*0x100000000)));
ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.z*0x100000000)));
ATOMIC_ADD(&force[atoms.y], (mm_ulong) realToFixedPoint(f2.x));
ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.y));
ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f2.z));
MEM_FENCE;
}
if (atoms.z > -1) {
ATOMIC_ADD(&force[atoms.z], (mm_ulong) ((mm_long) (f3.x*0x100000000)));
ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.y*0x100000000)));
ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.z*0x100000000)));
ATOMIC_ADD(&force[atoms.z], (mm_ulong) realToFixedPoint(f3.x));
ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.y));
ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(f3.z));
MEM_FENCE;
}
#else
......
......@@ -2,9 +2,9 @@
* Record the force on an atom to global memory.
*/
inline DEVICE void storeForce(int atom, real3 force, GLOBAL mm_ulong* RESTRICT forceBuffers) {
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
}
/**
......
......@@ -137,13 +137,13 @@ KERNEL void computeInteractionGroups(
}
#ifdef SUPPORTS_64_BIT_ATOMICS
if (exclusions != 0) {
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
}
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
SYNC_WARPS;
#else
writeForces(forceBuffers, localData, atom2);
......
......@@ -102,9 +102,9 @@ KERNEL void calculateEwaldForces(GLOBAL mm_long* RESTRICT forceBuffers, GLOBAL c
// Record the force on the atom.
forceBuffers[atom] += (mm_long) (force.x*0x100000000);
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_long) (force.y*0x100000000);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_long) (force.z*0x100000000);
forceBuffers[atom] += realToFixedPoint(force.x);
forceBuffers[atom+PADDED_NUM_ATOMS] += realToFixedPoint(force.y);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += realToFixedPoint(force.z);
atom += GLOBAL_SIZE;
}
}
......@@ -384,19 +384,19 @@ KERNEL void computeForce(
real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) ((mm_long) (force2.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.z*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) ((mm_long) (torque2.x*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.y*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) realToFixedPoint(force2.x));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.y));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.z));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) realToFixedPoint(torque2.x));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.y));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.z));
}
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) ((mm_long) (force1.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.z*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) ((mm_long) (torque1.x*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.y*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) realToFixedPoint(force1.x));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.y));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.z));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) realToFixedPoint(torque1.x));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.y));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.z));
}
#else
for (int atom1 = GLOBAL_ID; atom1 < numAtoms; atom1 += GLOBAL_SIZE) {
......@@ -432,19 +432,19 @@ KERNEL void computeForce(
real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) ((mm_long) (force2.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.z*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) ((mm_long) (torque2.x*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.y*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) realToFixedPoint(force2.x));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.y));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.z));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) realToFixedPoint(torque2.x));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.y));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.z));
}
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) ((mm_long) (force1.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.z*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) ((mm_long) (torque1.x*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.y*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) realToFixedPoint(force1.x));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.y));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.z));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) realToFixedPoint(torque1.x));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.y));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.z));
}
#endif
......@@ -466,18 +466,18 @@ KERNEL void computeForce(
if (r2 < CUTOFF_SQUARED) {
#endif
computeOneInteraction(&data1, &data2, params.x, params.y, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) ((mm_long) (force1.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force1.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) ((mm_long) (force2.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force2.z*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) ((mm_long) (torque1.x*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.y*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque1.z*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) ((mm_long) (torque2.x*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.y*0x100000000)));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (torque2.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[index1], (mm_ulong) realToFixedPoint(force1.x));
ATOMIC_ADD(&forceBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.y));
ATOMIC_ADD(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force1.z));
ATOMIC_ADD(&forceBuffers[index2], (mm_ulong) realToFixedPoint(force2.x));
ATOMIC_ADD(&forceBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.y));
ATOMIC_ADD(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force2.z));
ATOMIC_ADD(&torqueBuffers[index1], (mm_ulong) realToFixedPoint(torque1.x));
ATOMIC_ADD(&torqueBuffers[index1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.y));
ATOMIC_ADD(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque1.z));
ATOMIC_ADD(&torqueBuffers[index2], (mm_ulong) realToFixedPoint(torque2.x));
ATOMIC_ADD(&torqueBuffers[index2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.y));
ATOMIC_ADD(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(torque2.z));
#ifdef USE_CUTOFF
}
#endif
......@@ -522,16 +522,16 @@ KERNEL void applyTorques(
yforce += f;
force -= f;
}
ATOMIC_ADD(&forceBuffers[originalIndex], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[originalIndex+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[axisParticles.x], (mm_ulong) ((mm_long) (xforce.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (xforce.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (xforce.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[originalIndex], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[originalIndex+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&forceBuffers[axisParticles.x], (mm_ulong) realToFixedPoint(xforce.x));
ATOMIC_ADD(&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(xforce.y));
ATOMIC_ADD(&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(xforce.z));
if (axisParticles.y != -1) {
ATOMIC_ADD(&forceBuffers[axisParticles.y], (mm_ulong) ((mm_long) (yforce.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (yforce.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (yforce.z*0x100000000)));
ATOMIC_ADD(&forceBuffers[axisParticles.y], (mm_ulong) realToFixedPoint(yforce.x));
ATOMIC_ADD(&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(yforce.y));
ATOMIC_ADD(&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(yforce.z));
}
}
}
......
......@@ -148,10 +148,10 @@ KERNEL void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) ((mm_long) (bornSum*0x100000000)));
ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) realToFixedPoint(bornSum));
if (x != y) {
offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].bornSum*0x100000000)));
ATOMIC_ADD(&global_bornSum[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].bornSum));
}
#else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
......@@ -352,9 +352,9 @@ KERNEL void computeBornSum(
unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_ulong) ((mm_long) (bornSum*0x100000000)));
ATOMIC_ADD(&global_bornSum[atom1], (mm_ulong) realToFixedPoint(bornSum));
if (atom2 < PADDED_NUM_ATOMS)
ATOMIC_ADD(&global_bornSum[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].bornSum*0x100000000)));
ATOMIC_ADD(&global_bornSum[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].bornSum));
#else
unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = atom2 + warp*PADDED_NUM_ATOMS;
......@@ -534,16 +534,16 @@ KERNEL void computeGBSAForce1(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) ((mm_long) (force.w*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) realToFixedPoint(force.w));
if (x != y) {
offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000)));
ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fw*0x100000000)));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
ATOMIC_ADD(&global_bornForce[offset], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fw));
}
#else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
......@@ -758,15 +758,15 @@ KERNEL void computeGBSAForce1(
unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (force.x*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
ATOMIC_ADD(&global_bornForce[atom1], (mm_ulong) ((mm_long) (force.w*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], (mm_ulong) realToFixedPoint(force.w));
if (atom2 < PADDED_NUM_ATOMS) {
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fx*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fy*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fz*0x100000000)));
ATOMIC_ADD(&global_bornForce[atom2], (mm_ulong) ((mm_long) (localData[LOCAL_ID].fw*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fx));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fy));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fz));
ATOMIC_ADD(&global_bornForce[atom2], (mm_ulong) realToFixedPoint(localData[LOCAL_ID].fw));
}
#else
unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS;
......
......@@ -74,10 +74,10 @@ KERNEL void reduceBornForce(
energy += saTerm;
force *= bornRadius*bornRadius*obcChain[index];
#ifdef SUPPORTS_64_BIT_ATOMICS
bornForce[index] = (mm_long) (force*0x100000000);
bornForce[index] = realToFixedPoint(force);
#else
bornForce[index] = force;
#endif
}
energyBuffer[GLOBAL_ID] += energy/-6;
}
\ No newline at end of file
}
......@@ -88,7 +88,7 @@ KERNEL void computeBornSum(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000));
ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum;
......@@ -150,7 +150,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000));
ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum;
......@@ -162,7 +162,7 @@ KERNEL void computeBornSum(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&global_bornSum[offset], (mm_long) (localData[tgx].bornSum*0x100000000));
ATOMIC_ADD(&global_bornSum[offset], realToFixedPoint(localData[tgx].bornSum));
#else
unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += localData[tgx].bornSum;
......@@ -297,7 +297,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000));
ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum;
......@@ -360,7 +360,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom1], (mm_long) (bornSum*0x100000000));
ATOMIC_ADD(&global_bornSum[atom1], realToFixedPoint(bornSum));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += bornSum;
......@@ -378,7 +378,7 @@ KERNEL void computeBornSum(
#endif
if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&global_bornSum[atom2], (mm_long) (localData[tgx].bornSum*0x100000000));
ATOMIC_ADD(&global_bornSum[atom2], realToFixedPoint(localData[tgx].bornSum));
#else
unsigned int offset = atom2 + GROUP_ID*PADDED_NUM_ATOMS;
global_bornSum[offset] += localData[tgx].bornSum;
......@@ -491,10 +491,10 @@ KERNEL void computeGBSAForce1(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
......@@ -562,10 +562,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
......@@ -578,10 +578,10 @@ KERNEL void computeGBSAForce1(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_long) (localData[tgx].fx*0x100000000));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fy*0x100000000));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fz*0x100000000));
ATOMIC_ADD(&global_bornForce[offset], (mm_long) (localData[tgx].fw*0x100000000));
ATOMIC_ADD(&forceBuffers[offset], realToFixedPoint(localData[tgx].fx));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
ATOMIC_ADD(&global_bornForce[offset], realToFixedPoint(localData[tgx].fw));
#else
unsigned int offset = y*TILE_SIZE+tgx + GROUP_ID*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset];
......@@ -723,10 +723,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
......@@ -791,10 +791,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_long) (force.x*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_long) (force.y*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_long) (force.z*0x100000000));
ATOMIC_ADD(&global_bornForce[atom1], (mm_long) (force.w*0x100000000));
ATOMIC_ADD(&forceBuffers[atom1], realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], realToFixedPoint(force.z));
ATOMIC_ADD(&global_bornForce[atom1], realToFixedPoint(force.w));
#else
unsigned int offset = atom1 + GROUP_ID*PADDED_NUM_ATOMS;
forceBuffers[offset] += make_real4(force.x, force.y, force.z, 0);
......@@ -813,10 +813,10 @@ KERNEL void computeGBSAForce1(
#endif
if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom2], (mm_long) (localData[tgx].fx*0x100000000));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fy*0x100000000));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_long) (localData[tgx].fz*0x100000000));
ATOMIC_ADD(&global_bornForce[atom2], (mm_long) (localData[tgx].fw*0x100000000));
ATOMIC_ADD(&forceBuffers[atom2], realToFixedPoint(localData[tgx].fx));
ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fy));
ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], realToFixedPoint(localData[tgx].fz));
ATOMIC_ADD(&global_bornForce[atom2], realToFixedPoint(localData[tgx].fw));
#else
unsigned int offset = atom2 + GROUP_ID*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset];
......
......@@ -896,13 +896,13 @@ inline DEVICE real3 loadForce(int index, GLOBAL const mm_long* RESTRICT force) {
inline DEVICE void addForce(int index, GLOBAL mm_long* RESTRICT force, real3 value) {
GLOBAL mm_ulong* f = (GLOBAL mm_ulong*) force;
#ifdef HAS_OVERLAPPING_VSITES
ATOMIC_ADD(&f[index], (mm_ulong) ((mm_long) (value.x*0x100000000)));
ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (value.y*0x100000000)));
ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS*2], (mm_ulong) ((mm_long) (value.z*0x100000000)));
ATOMIC_ADD(&f[index], (mm_ulong) realToFixedPoint(value.x));
ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(value.y));
ATOMIC_ADD(&f[index+PADDED_NUM_ATOMS*2], (mm_ulong) realToFixedPoint(value.z));
#else
f[index] += (mm_ulong) ((mm_long) (value.x*0x100000000));
f[index+PADDED_NUM_ATOMS] += (mm_ulong) ((mm_long) (value.y*0x100000000));
f[index+PADDED_NUM_ATOMS*2] += (mm_ulong) ((mm_long) (value.z*0x100000000));
f[index] += (mm_ulong) realToFixedPoint(value.x);
f[index+PADDED_NUM_ATOMS] += (mm_ulong) realToFixedPoint(value.y);
f[index+PADDED_NUM_ATOMS*2] += (mm_ulong) realToFixedPoint(value.z);
#endif
}
......
......@@ -154,7 +154,7 @@ KERNEL void gridSpreadCharge(GLOBAL const real4* RESTRICT posq,
int index = ybase + zindexTable[zindex];
real add = dxdy*data[iz].z;
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD(&pmeGrid[index], (mm_ulong) ((mm_long) (add*0x100000000)));
ATOMIC_ADD(&pmeGrid[index], (mm_ulong) realToFixedPoint(add));
#else
ATOMIC_ADD(&pmeGrid[index], add);
#endif
......@@ -593,13 +593,13 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
#ifdef USE_PME_STREAM
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) ((mm_long) (forceX*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forceY*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (forceZ*0x100000000)));
ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) realToFixedPoint(forceX));
ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forceY));
ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(forceZ));
#else
forceBuffers[atom] += (mm_ulong) ((mm_long) (forceX*0x100000000));
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_ulong) ((mm_long) (forceY*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_ulong) ((mm_long) (forceZ*0x100000000));
forceBuffers[atom] += (mm_ulong) realToFixedPoint(forceX);
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_ulong) realToFixedPoint(forceY);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_ulong) realToFixedPoint(forceZ);
#endif
}
}
......@@ -607,9 +607,9 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
KERNEL void addForces(GLOBAL const real4* RESTRICT forces, GLOBAL mm_long* RESTRICT forceBuffers) {
for (int atom = GLOBAL_ID; atom < NUM_ATOMS; atom += GLOBAL_SIZE) {
real4 f = forces[atom];
forceBuffers[atom] += (mm_long) (f.x*0x100000000);
forceBuffers[atom+PADDED_NUM_ATOMS] += (mm_long) (f.y*0x100000000);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += (mm_long) (f.z*0x100000000);
forceBuffers[atom] += realToFixedPoint(f.x);
forceBuffers[atom+PADDED_NUM_ATOMS] += realToFixedPoint(f.y);
forceBuffers[atom+2*PADDED_NUM_ATOMS] += realToFixedPoint(f.z);
}
}
......
......@@ -90,8 +90,8 @@ KERNEL void computeRMSDForces(int numParticles, int paddedNumAtoms, GLOBAL const
buffer[1]*refPos.x + buffer[4]*refPos.y + buffer[7]*refPos.z,
buffer[2]*refPos.x + buffer[5]*refPos.y + buffer[8]*refPos.z);
real3 force = (rotatedRef-pos)*scale;
forceBuffers[index] += (mm_long) (force.x*0x100000000);
forceBuffers[index+paddedNumAtoms] += (mm_long) (force.y*0x100000000);
forceBuffers[index+2*paddedNumAtoms] += (mm_long) (force.z*0x100000000);
forceBuffers[index] += realToFixedPoint(force.x);
forceBuffers[index+paddedNumAtoms] += realToFixedPoint(force.y);
forceBuffers[index+2*paddedNumAtoms] += realToFixedPoint(force.z);
}
}
......@@ -166,9 +166,9 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
}
s<<computeForce<<"\n";
for (int i = 0; i < numAtoms; i++) {
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".x*0x100000000)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".y*0x100000000)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".z*0x100000000)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"], static_cast<unsigned long long>(realToFixedPoint(force"<<(i+1)<<".x)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force"<<(i+1)<<".y)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>(realToFixedPoint(force"<<(i+1)<<".z)));\n";
s<<" __threadfence_block();\n";
}
s<<"}\n";
......
......@@ -24,3 +24,7 @@ typedef unsigned long long mm_ulong;
#define SUPPORTS_64_BIT_ATOMICS 1
#define SUPPORTS_DOUBLE_PRECISION 1
__device__ inline long long realToFixedPoint(real x) {
return static_cast<long long>(x * 0x100000000);
}
......@@ -46,11 +46,11 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
*/
__device__ void saveSingleForce(int atom, real3 force, unsigned long long* forceBuffers) {
if (force.x != 0)
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>(realToFixedPoint(force.x)));
if (force.y != 0)
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
if (force.z != 0)
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
}
/**
......@@ -310,22 +310,22 @@ extern "C" __global__ void computeNonbonded(
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
#else
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fx)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fy)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fz)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(force.x)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
#endif
}
......@@ -581,9 +581,9 @@ extern "C" __global__ void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>(realToFixedPoint(force.x)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
#ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[threadIdx.x];
#else
......@@ -591,13 +591,13 @@ extern "C" __global__ void computeNonbonded(
#endif
if (atom2 < PADDED_NUM_ATOMS) {
#ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000)));
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
#else
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fx)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fy)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(localData[threadIdx.x].fz)));
#endif
}
#endif
......@@ -652,4 +652,4 @@ extern "C" __global__ void computeNonbonded(
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif
SAVE_DERIVATIVES
}
\ No newline at end of file
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment