Commit 1cb85ba3 authored by Peter Eastman's avatar Peter Eastman
Browse files

Minor cleanup to simplify code and eliminate unnecessary memory access

parent a5640f95
...@@ -590,14 +590,15 @@ inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __r ...@@ -590,14 +590,15 @@ inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __r
data.thole = temp.y; data.thole = temp.y;
} }
__device__ real computeDScaleFactor(unsigned int polarizationGroup) { __device__ real computeDScaleFactor(unsigned int polarizationGroup, int index) {
return (polarizationGroup & 1 ? 0 : 1); return (polarizationGroup & 1<<index ? 0 : 1);
} }
__device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup) { __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup, int index) {
bool x = (covalent.x & 1); int mask = 1<<index;
bool y = (covalent.y & 1); bool x = (covalent.x & mask);
bool p = (polarizationGroup & 1); bool y = (covalent.y & mask);
bool p = (polarizationGroup & mask);
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f)); return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
} }
...@@ -679,15 +680,12 @@ extern "C" __global__ void computeEDiffForce( ...@@ -679,15 +680,12 @@ extern "C" __global__ void computeEDiffForce(
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce; real3 tempForce;
real tempEnergy; real tempEnergy;
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, j);
computeOneEDiffInteractionF1(data, localData[tbx+j], d, p, tempEnergy, tempForce); computeOneEDiffInteractionF1(data, localData[tbx+j], d, p, tempEnergy, tempForce);
energy += 0.25f*tempEnergy; energy += 0.25f*tempEnergy;
data.force += tempForce; data.force += tempForce;
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
...@@ -698,20 +696,15 @@ extern "C" __global__ void computeEDiffForce( ...@@ -698,20 +696,15 @@ extern "C" __global__ void computeEDiffForce(
// Compute torques. // Compute torques.
data.force = make_real3(0); data.force = make_real3(0);
covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j; int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempTorque; real3 tempTorque;
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, j);
computeOneEDiffInteractionT1(data, localData[tbx+j], d, p, tempTorque); computeOneEDiffInteractionT1(data, localData[tbx+j], d, p, tempTorque);
data.force += tempTorque; data.force += tempTorque;
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
...@@ -726,9 +719,6 @@ extern "C" __global__ void computeEDiffForce( ...@@ -726,9 +719,6 @@ extern "C" __global__ void computeEDiffForce(
localData[threadIdx.x].force = make_real3(0); localData[threadIdx.x].force = make_real3(0);
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
// Compute forces. // Compute forces.
...@@ -738,16 +728,13 @@ extern "C" __global__ void computeEDiffForce( ...@@ -738,16 +728,13 @@ extern "C" __global__ void computeEDiffForce(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce; real3 tempForce;
real tempEnergy; real tempEnergy;
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, tj);
computeOneEDiffInteractionF1(data, localData[tbx+tj], d, p, tempEnergy, tempForce); computeOneEDiffInteractionF1(data, localData[tbx+tj], d, p, tempEnergy, tempForce);
energy += 0.5f*tempEnergy; energy += 0.5f*tempEnergy;
data.force += tempForce; data.force += tempForce;
localData[tbx+tj].force -= tempForce; localData[tbx+tj].force -= tempForce;
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
...@@ -762,44 +749,36 @@ extern "C" __global__ void computeEDiffForce( ...@@ -762,44 +749,36 @@ extern "C" __global__ void computeEDiffForce(
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
} }
// Compute torques. // Compute torques.
data.force = make_real3(0); data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0); localData[threadIdx.x].force = make_real3(0);
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); for (j = 0; j < TILE_SIZE; j++) {
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); int atom2 = y*TILE_SIZE+tj;
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx)); if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx)); real3 tempTorque;
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx)); float d = computeDScaleFactor(polarizationGroup, tj);
for (j = 0; j < TILE_SIZE; j++) { float p = computePScaleFactor(covalent, polarizationGroup, tj);
int atom2 = y*TILE_SIZE+tj; computeOneEDiffInteractionT1(data, localData[tbx+tj], d, p, tempTorque);
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { data.force += tempTorque;
real3 tempTorque; computeOneEDiffInteractionT3(data, localData[tbx+tj], d, p, tempTorque);
float d = computeDScaleFactor(polarizationGroup); localData[tbx+tj].force += tempTorque;
float p = computePScaleFactor(covalent, polarizationGroup);
computeOneEDiffInteractionT1(data, localData[tbx+tj], d, p, tempTorque);
data.force += tempTorque;
computeOneEDiffInteractionT3(data, localData[tbx+tj], d, p, tempTorque);
localData[tbx+tj].force += tempTorque;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
} }
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&torqueBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
} }
} }
pos++; pos++;
......
...@@ -35,20 +35,22 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -35,20 +35,22 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.thole = temp.y; data.thole = temp.y;
} }
__device__ real computeDScaleFactor(unsigned int polarizationGroup) { __device__ real computeDScaleFactor(unsigned int polarizationGroup, int index) {
return (polarizationGroup & 1 ? 0 : 1); return (polarizationGroup & 1<<index ? 0 : 1);
} }
__device__ float computeMScaleFactor(uint2 covalent) { __device__ float computeMScaleFactor(uint2 covalent, int index) {
bool x = (covalent.x & 1); int mask = 1<<index;
bool y = (covalent.y & 1); bool x = (covalent.x & mask);
bool y = (covalent.y & mask);
return (x ? (y ? 0.0f : 0.4f) : (y ? 0.8f : 1.0f)); return (x ? (y ? 0.0f : 0.4f) : (y ? 0.8f : 1.0f));
} }
__device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup) { __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup, int index) {
bool x = (covalent.x & 1); int mask = 1<<index;
bool y = (covalent.y & 1); bool x = (covalent.x & mask);
bool p = (polarizationGroup & 1); bool y = (covalent.y & mask);
bool p = (polarizationGroup & mask);
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f)); return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
} }
...@@ -147,16 +149,13 @@ extern "C" __global__ void computeElectrostatics( ...@@ -147,16 +149,13 @@ extern "C" __global__ void computeElectrostatics(
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce; real3 tempForce;
real tempEnergy; real tempEnergy;
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent); float m = computeMScaleFactor(covalent, j);
computeOneInteractionF1(data, localData[tbx+j], d, p, m, tempEnergy, tempForce); computeOneInteractionF1(data, localData[tbx+j], d, p, m, tempEnergy, tempForce);
data.force += tempForce; data.force += tempForce;
energy += 0.5f*tempEnergy; energy += 0.5f*tempEnergy;
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
...@@ -166,21 +165,16 @@ extern "C" __global__ void computeElectrostatics( ...@@ -166,21 +165,16 @@ extern "C" __global__ void computeElectrostatics(
// Compute torques. // Compute torques.
data.force = make_real3(0); data.force = make_real3(0);
covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j; int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce; real3 tempForce;
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent); float m = computeMScaleFactor(covalent, j);
computeOneInteractionT1(data, localData[tbx+j], d, p, m, tempForce); computeOneInteractionT1(data, localData[tbx+j], d, p, m, tempForce);
data.force += tempForce; data.force += tempForce;
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
...@@ -324,9 +318,6 @@ extern "C" __global__ void computeElectrostatics( ...@@ -324,9 +318,6 @@ extern "C" __global__ void computeElectrostatics(
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
// Compute forces. // Compute forces.
...@@ -336,17 +327,14 @@ extern "C" __global__ void computeElectrostatics( ...@@ -336,17 +327,14 @@ extern "C" __global__ void computeElectrostatics(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce; real3 tempForce;
real tempEnergy; real tempEnergy;
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent); float m = computeMScaleFactor(covalent, tj);
computeOneInteractionF1(data, localData[tbx+tj], d, p, m, tempEnergy, tempForce); computeOneInteractionF1(data, localData[tbx+tj], d, p, m, tempEnergy, tempForce);
data.force += tempForce; data.force += tempForce;
localData[tbx+tj].force -= tempForce; localData[tbx+tj].force -= tempForce;
energy += tempEnergy; energy += tempEnergy;
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
...@@ -366,26 +354,18 @@ extern "C" __global__ void computeElectrostatics( ...@@ -366,26 +354,18 @@ extern "C" __global__ void computeElectrostatics(
data.force = make_real3(0); data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0); localData[threadIdx.x].force = make_real3(0);
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj; int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce; real3 tempForce;
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent); float m = computeMScaleFactor(covalent, tj);
computeOneInteractionT1(data, localData[tbx+tj], d, p, m, tempForce); computeOneInteractionT1(data, localData[tbx+tj], d, p, m, tempForce);
data.force += tempForce; data.force += tempForce;
computeOneInteractionT3(data, localData[tbx+tj], d, p, m, tempForce); computeOneInteractionT3(data, localData[tbx+tj], d, p, m, tempForce);
localData[tbx+tj].force += tempForce; localData[tbx+tj].force += tempForce;
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
data.force *= ENERGY_SCALE_FACTOR; data.force *= ENERGY_SCALE_FACTOR;
......
...@@ -381,14 +381,15 @@ __device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3 ...@@ -381,14 +381,15 @@ __device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3
} }
#endif #endif
__device__ real computeDScaleFactor(unsigned int polarizationGroup) { __device__ real computeDScaleFactor(unsigned int polarizationGroup, int index) {
return (polarizationGroup & 1 ? 0 : 1); return (polarizationGroup & 1<<index ? 0 : 1);
} }
__device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup) { __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup, int index) {
bool x = (covalent.x & 1); int mask = 1<<index;
bool y = (covalent.y & 1); bool x = (covalent.x & mask);
bool p = (polarizationGroup & 1); bool y = (covalent.y & mask);
bool p = (polarizationGroup & mask);
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f)); return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
} }
...@@ -499,8 +500,8 @@ extern "C" __global__ void computeFixedField( ...@@ -499,8 +500,8 @@ extern "C" __global__ void computeFixedField(
int atom2 = y*TILE_SIZE+j; int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4]; real3 fields[4];
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, j);
computeOneInteraction(data, localData[tbx+j], delta, d, p, fields); computeOneInteraction(data, localData[tbx+j], delta, d, p, fields);
data.field += fields[0]; data.field += fields[0];
data.fieldPolar += fields[1]; data.fieldPolar += fields[1];
...@@ -512,9 +513,6 @@ extern "C" __global__ void computeFixedField( ...@@ -512,9 +513,6 @@ extern "C" __global__ void computeFixedField(
data.gkField += fields[0]; data.gkField += fields[0];
} }
#endif #endif
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
} }
} }
else { else {
...@@ -604,9 +602,6 @@ extern "C" __global__ void computeFixedField( ...@@ -604,9 +602,6 @@ extern "C" __global__ void computeFixedField(
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
real3 delta = trimTo3(localData[tbx+tj].posq-data.posq); real3 delta = trimTo3(localData[tbx+tj].posq-data.posq);
...@@ -618,8 +613,8 @@ extern "C" __global__ void computeFixedField( ...@@ -618,8 +613,8 @@ extern "C" __global__ void computeFixedField(
int atom2 = y*TILE_SIZE+tj; int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[4]; real3 fields[4];
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, tj);
computeOneInteraction(data, localData[tbx+tj], delta, d, p, fields); computeOneInteraction(data, localData[tbx+tj], delta, d, p, fields);
data.field += fields[0]; data.field += fields[0];
data.fieldPolar += fields[1]; data.fieldPolar += fields[1];
...@@ -631,9 +626,6 @@ extern "C" __global__ void computeFixedField( ...@@ -631,9 +626,6 @@ extern "C" __global__ void computeFixedField(
localData[tbx+tj].gkField += fields[1]; localData[tbx+tj].gkField += fields[1];
#endif #endif
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
} }
......
...@@ -41,20 +41,22 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -41,20 +41,22 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.thole = temp.y; data.thole = temp.y;
} }
__device__ real computeDScaleFactor(unsigned int polarizationGroup) { __device__ real computeDScaleFactor(unsigned int polarizationGroup, int index) {
return (polarizationGroup & 1 ? 0 : 1); return (polarizationGroup & 1<<index ? 0 : 1);
} }
__device__ float computeMScaleFactor(uint2 covalent) { __device__ float computeMScaleFactor(uint2 covalent, int index) {
bool x = (covalent.x & 1); int mask = 1<<index;
bool y = (covalent.y & 1); bool x = (covalent.x & mask);
bool y = (covalent.y & mask);
return (x ? (y ? 0.0f : 0.4f) : (y ? 0.8f : 1.0f)); return (x ? (y ? 0.0f : 0.4f) : (y ? 0.8f : 1.0f));
} }
__device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup) { __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup, int index) {
bool x = (covalent.x & 1); int mask = 1<<index;
bool y = (covalent.y & 1); bool x = (covalent.x & mask);
bool p = (polarizationGroup & 1); bool y = (covalent.y & mask);
bool p = (polarizationGroup & mask);
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f)); return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
} }
...@@ -270,14 +272,11 @@ extern "C" __global__ void computeElectrostatics( ...@@ -270,14 +272,11 @@ extern "C" __global__ void computeElectrostatics(
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j; int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, j);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, j);
float m = computeMScaleFactor(covalent); float m = computeMScaleFactor(covalent, j);
computeOneInteraction(data, localData[tbx+j], hasExclusions, d, p, m, 0.5f, energy, periodicBoxSize, invPeriodicBoxSize); computeOneInteraction(data, localData[tbx+j], hasExclusions, d, p, m, 0.5f, energy, periodicBoxSize, invPeriodicBoxSize);
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
} }
if (atom1 < NUM_ATOMS) if (atom1 < NUM_ATOMS)
computeSelfEnergyAndTorque(data, energy); computeSelfEnergyAndTorque(data, energy);
...@@ -391,9 +390,6 @@ extern "C" __global__ void computeElectrostatics( ...@@ -391,9 +390,6 @@ extern "C" __global__ void computeElectrostatics(
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
// Compute forces. // Compute forces.
...@@ -401,14 +397,11 @@ extern "C" __global__ void computeElectrostatics( ...@@ -401,14 +397,11 @@ extern "C" __global__ void computeElectrostatics(
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj; int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
float d = computeDScaleFactor(polarizationGroup); float d = computeDScaleFactor(polarizationGroup, tj);
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup, tj);
float m = computeMScaleFactor(covalent); float m = computeMScaleFactor(covalent, tj);
computeOneInteraction(data, localData[tbx+tj], hasExclusions, d, p, m, 1, energy, periodicBoxSize, invPeriodicBoxSize); computeOneInteraction(data, localData[tbx+tj], hasExclusions, d, p, m, 1, energy, periodicBoxSize, invPeriodicBoxSize);
} }
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
data.force *= -ENERGY_SCALE_FACTOR; data.force *= -ENERGY_SCALE_FACTOR;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment