Commit 31ed4381 authored by Peter Eastman's avatar Peter Eastman
Browse files

Minor simplification to nonbonded kernel

parent 0dbb8a47
...@@ -325,23 +325,23 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -325,23 +325,23 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
stringstream loadLocal1; stringstream loadLocal1;
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
if (params[i].getNumComponents() == 1) { if (params[i].getNumComponents() == 1) {
loadLocal1<<"localData[get_local_id(0)]."<<params[i].getName()<<" = "<<params[i].getName()<<"1;\n"; loadLocal1<<"atom1Data->"<<params[i].getName()<<" = "<<params[i].getName()<<"1;\n";
} }
else { else {
for (int j = 0; j < params[i].getNumComponents(); ++j) for (int j = 0; j < params[i].getNumComponents(); ++j)
loadLocal1<<"localData[get_local_id(0)]."<<params[i].getName()<<"_"<<suffixes[j]<<" = "<<params[i].getName()<<"1."<<suffixes[j]<<";\n"; loadLocal1<<"atom1Data->"<<params[i].getName()<<"_"<<suffixes[j]<<" = "<<params[i].getName()<<"1."<<suffixes[j]<<";\n";
} }
} }
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
stringstream loadLocal2; stringstream loadLocal2;
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
if (params[i].getNumComponents() == 1) { if (params[i].getNumComponents() == 1) {
loadLocal2<<"localData[get_local_id(0)]."<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n"; loadLocal2<<"atom1Data->"<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n";
} }
else { else {
loadLocal2<<params[i].getType()<<" temp_"<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n"; loadLocal2<<params[i].getType()<<" temp_"<<params[i].getName()<<" = global_"<<params[i].getName()<<"[j];\n";
for (int j = 0; j < params[i].getNumComponents(); ++j) for (int j = 0; j < params[i].getNumComponents(); ++j)
loadLocal2<<"localData[get_local_id(0)]."<<params[i].getName()<<"_"<<suffixes[j]<<" = temp_"<<params[i].getName()<<"."<<suffixes[j]<<";\n"; loadLocal2<<"atom1Data->"<<params[i].getName()<<"_"<<suffixes[j]<<" = temp_"<<params[i].getName()<<"."<<suffixes[j]<<";\n";
} }
} }
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
...@@ -358,14 +358,14 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -358,14 +358,14 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
stringstream load2j; stringstream load2j;
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
if (params[i].getNumComponents() == 1) { if (params[i].getNumComponents() == 1) {
load2j<<params[i].getType()<<" "<<params[i].getName()<<"2 = localData[atom2]."<<params[i].getName()<<";\n"; load2j<<params[i].getType()<<" "<<params[i].getName()<<"2 = atom2Data->"<<params[i].getName()<<";\n";
} }
else { else {
load2j<<params[i].getType()<<" "<<params[i].getName()<<"2 = ("<<params[i].getType()<<") ("; load2j<<params[i].getType()<<" "<<params[i].getName()<<"2 = ("<<params[i].getType()<<") (";
for (int j = 0; j < params[i].getNumComponents(); ++j) { for (int j = 0; j < params[i].getNumComponents(); ++j) {
if (j > 0) if (j > 0)
load2j<<", "; load2j<<", ";
load2j<<"localData[atom2]."<<params[i].getName()<<"_"<<suffixes[j]; load2j<<"atom2Data->"<<params[i].getName()<<"_"<<suffixes[j];
} }
load2j<<");\n"; load2j<<");\n";
} }
...@@ -387,11 +387,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -387,11 +387,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["INV_PERIODIC_BOX_SIZE_Y"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.y); defines["INV_PERIODIC_BOX_SIZE_Y"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.y);
defines["INV_PERIODIC_BOX_SIZE_Z"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.z); defines["INV_PERIODIC_BOX_SIZE_Z"] = OpenCLExpressionUtilities::doubleToString(1.0/periodicBoxSize.z);
defines["CUTOFF_SQUARED"] = OpenCLExpressionUtilities::doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = OpenCLExpressionUtilities::doubleToString(cutoff*cutoff);
stringstream natom, padded; defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms());
natom << context.getNumAtoms(); defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
padded << context.getPaddedNumAtoms();
defines["NUM_ATOMS"] = natom.str();
defines["PADDED_NUM_ATOMS"] = padded.str();
string file = (context.getSIMDWidth() == 32 ? OpenCLKernelSources::nonbonded_nvidia : OpenCLKernelSources::nonbonded_default); string file = (context.getSIMDWidth() == 32 ? OpenCLKernelSources::nonbonded_nvidia : OpenCLKernelSources::nonbonded_default);
cl::Program program = context.createProgram(context.replaceStrings(file, replacements), defines); cl::Program program = context.createProgram(context.replaceStrings(file, replacements), defines);
cl::Kernel kernel(program, "computeNonbonded"); cl::Kernel kernel(program, "computeNonbonded");
......
...@@ -40,14 +40,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -40,14 +40,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned int atom1 = x + tgx; unsigned int atom1 = x + tgx;
float4 force = 0.0f; float4 force = 0.0f;
float4 posq1 = posq[atom1]; float4 posq1 = posq[atom1];
__local AtomData* atom1Data = &localData[get_local_id(0)];
LOAD_ATOM1_PARAMETERS LOAD_ATOM1_PARAMETERS
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
localData[get_local_id(0)].x = posq1.x; atom1Data->x = posq1.x;
localData[get_local_id(0)].y = posq1.y; atom1Data->y = posq1.y;
localData[get_local_id(0)].z = posq1.z; atom1Data->z = posq1.z;
localData[get_local_id(0)].q = posq1.w; atom1Data->q = posq1.w;
LOAD_LOCAL_PARAMETERS_FROM_1 LOAD_LOCAL_PARAMETERS_FROM_1
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
unsigned int xi = x/TILE_SIZE; unsigned int xi = x/TILE_SIZE;
...@@ -59,8 +60,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -59,8 +60,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
int atom2 = baseLocalAtom+j; __local AtomData* atom2Data = &localData[baseLocalAtom+j];
float4 posq2 = (float4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); float4 posq2 = (float4) (atom2Data->x, atom2Data->y, atom2Data->z, atom2Data->q);
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
...@@ -71,7 +72,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -71,7 +72,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float invR = RSQRT(r2); float invR = RSQRT(r2);
float r = RECIP(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+j; int atom2 = y+baseLocalAtom+j;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
...@@ -101,15 +102,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -101,15 +102,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if (lasty != y && get_local_id(0) < TILE_SIZE) { if (lasty != y && get_local_id(0) < TILE_SIZE) {
unsigned int j = y + tgx; unsigned int j = y + tgx;
float4 tempPosq = posq[j]; float4 tempPosq = posq[j];
localData[get_local_id(0)].x = tempPosq.x; atom1Data->x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y; atom1Data->y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z; atom1Data->z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w; atom1Data->q = tempPosq.w;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
} }
localData[get_local_id(0)].fx = 0.0f; atom1Data->fx = 0.0f;
localData[get_local_id(0)].fy = 0.0f; atom1Data->fy = 0.0f;
localData[get_local_id(0)].fz = 0.0f; atom1Data->fz = 0.0f;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
// Compute the full set of interactions in this tile. // Compute the full set of interactions in this tile.
...@@ -127,8 +128,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -127,8 +128,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
int atom2 = baseLocalAtom+tj; __local AtomData* atom2Data = &localData[baseLocalAtom+j];
float4 posq2 = (float4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); float4 posq2 = (float4) (atom2Data->x, atom2Data->y, atom2Data->z, atom2Data->q);
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
...@@ -139,7 +140,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -139,7 +140,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float invR = RSQRT(r2); float invR = RSQRT(r2);
float r = RECIP(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+baseLocalAtom+tj; int atom2 = y+baseLocalAtom+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
...@@ -168,7 +169,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -168,7 +169,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned int offset2 = y + tgx + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset2 = y + tgx + get_group_id(0)*PADDED_NUM_ATOMS;
#endif #endif
forceBuffers[offset1].xyz = forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id(0)+TILE_SIZE].xyz; forceBuffers[offset1].xyz = forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id(0)+TILE_SIZE].xyz;
float4 sum = (float4) (localData[get_local_id(0)].fx+localData[get_local_id(0)+TILE_SIZE].fx, localData[get_local_id(0)].fy+localData[get_local_id(0)+TILE_SIZE].fy, localData[get_local_id(0)].fz+localData[get_local_id(0)+TILE_SIZE].fz, 0.0f); float4 sum = (float4) (atom1Data->fx+localData[get_local_id(0)+TILE_SIZE].fx, atom1Data->fy+localData[get_local_id(0)+TILE_SIZE].fy, atom1Data->fz+localData[get_local_id(0)+TILE_SIZE].fz, 0.0f);
forceBuffers[offset2].xyz = forceBuffers[offset2].xyz+sum.xyz; forceBuffers[offset2].xyz = forceBuffers[offset2].xyz+sum.xyz;
} }
lasty = y; lasty = y;
......
...@@ -41,14 +41,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -41,14 +41,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned int atom1 = x + tgx; unsigned int atom1 = x + tgx;
float4 force = 0.0f; float4 force = 0.0f;
float4 posq1 = posq[atom1]; float4 posq1 = posq[atom1];
__local AtomData* atom1Data = &localData[get_local_id(0)];
LOAD_ATOM1_PARAMETERS LOAD_ATOM1_PARAMETERS
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
localData[get_local_id(0)].x = posq1.x; atom1Data->x = posq1.x;
localData[get_local_id(0)].y = posq1.y; atom1Data->y = posq1.y;
localData[get_local_id(0)].z = posq1.z; atom1Data->z = posq1.z;
localData[get_local_id(0)].q = posq1.w; atom1Data->q = posq1.w;
LOAD_LOCAL_PARAMETERS_FROM_1 LOAD_LOCAL_PARAMETERS_FROM_1
unsigned int xi = x/TILE_SIZE; unsigned int xi = x/TILE_SIZE;
unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2; unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
...@@ -59,8 +60,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -59,8 +60,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
int atom2 = tbx+j; __local AtomData* atom2Data = &localData[tbx+j];
float4 posq2 = (float4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); float4 posq2 = (float4) (atom2Data->x, atom2Data->y, atom2Data->z, atom2Data->q);
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
...@@ -71,7 +72,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -71,7 +72,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float r = sqrt(r2); float r = sqrt(r2);
float invR = 1.0f/r; float invR = 1.0f/r;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; int atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
...@@ -95,15 +96,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -95,15 +96,15 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if (lasty != y) { if (lasty != y) {
unsigned int j = y + tgx; unsigned int j = y + tgx;
float4 tempPosq = posq[j]; float4 tempPosq = posq[j];
localData[get_local_id(0)].x = tempPosq.x; atom1Data->x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y; atom1Data->y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z; atom1Data->z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w; atom1Data->q = tempPosq.w;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
} }
localData[get_local_id(0)].fx = 0.0f; atom1Data->fx = 0.0f;
localData[get_local_id(0)].fy = 0.0f; atom1Data->fy = 0.0f;
localData[get_local_id(0)].fz = 0.0f; atom1Data->fz = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos]; unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) { if (!hasExclusions && flags != 0xFFFFFFFF) {
...@@ -116,8 +117,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -116,8 +117,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) { if ((flags&(1<<j)) != 0) {
bool isExcluded = false; bool isExcluded = false;
int atom2 = tbx+j; __local AtomData* atom2Data = &localData[tbx+j];
float4 posq2 = (float4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); float4 posq2 = (float4) (atom2Data->x, atom2Data->y, atom2Data->z, atom2Data->q);
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
...@@ -128,7 +129,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -128,7 +129,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float invR = RSQRT(r2); float invR = RSQRT(r2);
float r = RECIP(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+j; int atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
...@@ -148,9 +149,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -148,9 +149,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if (tgx % 16 == 0) if (tgx % 16 == 0)
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+8].xyz; tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+8].xyz;
if (tgx == 0) { if (tgx == 0) {
localData[tbx+j].fx += tempBuffer[get_local_id(0)].x + tempBuffer[get_local_id(0)+16].x; atom2Data->fx += tempBuffer[get_local_id(0)].x + tempBuffer[get_local_id(0)+16].x;
localData[tbx+j].fy += tempBuffer[get_local_id(0)].y + tempBuffer[get_local_id(0)+16].y; atom2Data->fy += tempBuffer[get_local_id(0)].y + tempBuffer[get_local_id(0)+16].y;
localData[tbx+j].fz += tempBuffer[get_local_id(0)].z + tempBuffer[get_local_id(0)+16].z; atom2Data->fz += tempBuffer[get_local_id(0)].z + tempBuffer[get_local_id(0)+16].z;
} }
} }
} }
...@@ -173,8 +174,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -173,8 +174,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
int atom2 = tbx+tj; __local AtomData* atom2Data = &localData[tbx+tj];
float4 posq2 = (float4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q); float4 posq2 = (float4) (atom2Data->x, atom2Data->y, atom2Data->z, atom2Data->q);
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f); float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X; delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
...@@ -185,16 +186,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -185,16 +186,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float invR = RSQRT(r2); float invR = RSQRT(r2);
float r = RECIP(invR); float r = RECIP(invR);
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y+tj; int atom2 = y+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
localData[tbx+tj].fx += delta.x; atom2Data->fx += delta.x;
localData[tbx+tj].fy += delta.y; atom2Data->fy += delta.y;
localData[tbx+tj].fz += delta.z; atom2Data->fz += delta.z;
excl >>= 1; excl >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
...@@ -209,7 +210,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -209,7 +210,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS; unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS;
#endif #endif
forceBuffers[offset1].xyz += force.xyz; forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset2] += (float4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f); forceBuffers[offset2] += (float4) (atom1Data->fx, atom1Data->fy, atom1Data->fz, 0.0f);
lasty = y; lasty = y;
} }
pos++; pos++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment