Commit db8a55b3 authored by Peter Eastman's avatar Peter Eastman
Browse files

Eliminated local memory bank conflicts

parent 72a8bb80
...@@ -460,7 +460,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -460,7 +460,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel.setArg<cl::Buffer>(index++, exclusionIndices->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, exclusionIndices->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, exclusionRowIndices->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, exclusionRowIndices->getDeviceBuffer());
kernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize*localDataSize : OpenCLContext::ThreadBlockSize*localDataSize), NULL); kernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize*localDataSize : OpenCLContext::ThreadBlockSize*localDataSize), NULL);
kernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL); kernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
if (useCutoff) { if (useCutoff) {
kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer());
......
...@@ -13,7 +13,7 @@ typedef struct { ...@@ -13,7 +13,7 @@ typedef struct {
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions, void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer, __global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags __global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags
#else #else
...@@ -180,29 +180,46 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -180,29 +180,46 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
int bufferIndex = 3*get_local_id(0);
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)] = delta; tempBuffer[bufferIndex] = delta.x;
tempBuffer[bufferIndex+1] = delta.y;
tempBuffer[bufferIndex+2] = delta.z;
#else #else
force.xyz -= dEdR1.xyz; force.xyz -= dEdR1.xyz;
tempBuffer[get_local_id(0)] = dEdR2; tempBuffer[bufferIndex] = dEdR2.x;
tempBuffer[bufferIndex+1] = dEdR2.y;
tempBuffer[bufferIndex+2] = dEdR2.z;
#endif #endif
// Sum the forces on atom2. // Sum the forces on atom2.
if (tgx % 2 == 0) if (tgx % 2 == 0) {
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+1].xyz; tempBuffer[bufferIndex] += tempBuffer[bufferIndex+3];
if (tgx % 4 == 0) tempBuffer[bufferIndex+1] += tempBuffer[bufferIndex+4];
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+2].xyz; tempBuffer[bufferIndex+2] += tempBuffer[bufferIndex+5];
if (tgx % 8 == 0) }
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+4].xyz; if (tgx % 4 == 0) {
if (tgx % 16 == 0) tempBuffer[bufferIndex] += tempBuffer[bufferIndex+6];
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+8].xyz; tempBuffer[bufferIndex+1] += tempBuffer[bufferIndex+7];
tempBuffer[bufferIndex+2] += tempBuffer[bufferIndex+8];
}
if (tgx % 8 == 0) {
tempBuffer[bufferIndex] += tempBuffer[bufferIndex+12];
tempBuffer[bufferIndex+1] += tempBuffer[bufferIndex+13];
tempBuffer[bufferIndex+2] += tempBuffer[bufferIndex+14];
}
if (tgx % 16 == 0) {
tempBuffer[bufferIndex] += tempBuffer[bufferIndex+24];
tempBuffer[bufferIndex+1] += tempBuffer[bufferIndex+25];
tempBuffer[bufferIndex+2] += tempBuffer[bufferIndex+26];
}
if (tgx == 0) { if (tgx == 0) {
localData[tbx+j].fx += tempBuffer[get_local_id(0)].x + tempBuffer[get_local_id(0)+16].x; localData[tbx+j].fx += tempBuffer[bufferIndex] + tempBuffer[bufferIndex+48];
localData[tbx+j].fy += tempBuffer[get_local_id(0)].y + tempBuffer[get_local_id(0)+16].y; localData[tbx+j].fy += tempBuffer[bufferIndex+1] + tempBuffer[bufferIndex+49];
localData[tbx+j].fz += tempBuffer[get_local_id(0)].z + tempBuffer[get_local_id(0)+16].z; localData[tbx+j].fz += tempBuffer[bufferIndex+2] + tempBuffer[bufferIndex+50];
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment