Commit 7e9cf94b authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed an error on AMD's OpenCL implementation

parent e43f14bf
...@@ -56,7 +56,6 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context ...@@ -56,7 +56,6 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context
void OpenCLCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) { void OpenCLCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) {
cl.getNonbondedUtilities().computeInteractions(); cl.getNonbondedUtilities().computeInteractions();
cl.reduceBuffer(cl.getForceBuffers(), cl.getNumForceBuffers()); cl.reduceBuffer(cl.getForceBuffers(), cl.getNumForceBuffers());
cl.getNonbondedUtilities().prepareInteractions();
} }
void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context) { void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context) {
......
const unsigned int TileSize = 32; #define TILE_SIZE 32
/** /**
* Compute nonbonded interactions. * Compute nonbonded interactions.
...@@ -23,12 +23,12 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -23,12 +23,12 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x = tiles[pos]; unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TileSize; unsigned int y = ((x >> 2) & 0x7fff)*TILE_SIZE;
bool hasExclusions = (x & 0x1); bool hasExclusions = (x & 0x1);
x = (x>>17)*TileSize; x = (x>>17)*TILE_SIZE;
unsigned int baseLocalAtom = (get_local_id(0) < TileSize ? 0 : TileSize/2); unsigned int baseLocalAtom = (get_local_id(0) < TILE_SIZE ? 0 : TILE_SIZE/2);
unsigned int tgx = get_local_id(0) & (TileSize-1); unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
unsigned int forceBufferOffset = (tgx < TileSize/2 ? 0 : TileSize); unsigned int forceBufferOffset = (tgx < TILE_SIZE/2 ? 0 : TILE_SIZE);
unsigned int atom1 = x + tgx; unsigned int atom1 = x + tgx;
float4 force = 0.0f; float4 force = 0.0f;
float4 posq1 = posq[atom1]; float4 posq1 = posq[atom1];
...@@ -39,12 +39,12 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -39,12 +39,12 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
local_posq[get_local_id(0)] = posq1; local_posq[get_local_id(0)] = posq1;
LOAD_LOCAL_PARAMETERS_FROM_1 LOAD_LOCAL_PARAMETERS_FROM_1
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
unsigned int xi = x/TileSize; unsigned int xi = x/TILE_SIZE;
unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TileSize-xi*(xi+1)/2; unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
unsigned int excl = exclusions[exclusionIndices[tile]+tgx] >> baseLocalAtom; unsigned int excl = exclusions[exclusionIndices[tile]+tgx] >> baseLocalAtom;
#endif #endif
for (unsigned int j = 0; j < TileSize/2; j++) { for (unsigned int j = 0; j < TILE_SIZE/2; j++) {
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -72,48 +72,41 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -72,48 +72,41 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
// Sum the forces and write results. // Sum the forces and write results.
if (get_local_id(0) >= TileSize) if (get_local_id(0) >= TILE_SIZE)
tempBuffer[get_local_id(0)] = force; tempBuffer[get_local_id(0)] = force;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < TileSize) { if (get_local_id(0) < TILE_SIZE) {
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TileSize)*PADDED_NUM_ATOMS; unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else #else
unsigned int offset = x + tgx + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset = x + tgx + get_group_id(0)*PADDED_NUM_ATOMS;
#endif #endif
forceBuffers[offset].xyz += force.xyz+tempBuffer[get_local_id(0)+TileSize].xyz; forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz+tempBuffer[get_local_id(0)+TILE_SIZE].xyz;
} }
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
if (lasty != y && get_local_id(0) < TileSize) { if (lasty != y && get_local_id(0) < TILE_SIZE) {
unsigned int j = y + tgx; unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j]; local_posq[get_local_id(0)] = posq[j];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
} }
local_force[get_local_id(0)] = 0.0f; local_force[get_local_id(0)] = 0.0f;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
#ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags == 0) {
// No interactions in this tile.
}
else
#endif
{
// Compute the full set of interactions in this tile. // Compute the full set of interactions in this tile.
unsigned int xi = x/TileSize; unsigned int xi = x/TILE_SIZE;
unsigned int yi = y/TileSize; unsigned int yi = y/TILE_SIZE;
unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TileSize-yi*(yi+1)/2; unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*(yi+1)/2;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF); unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) | (excl << (TileSize - tgx)); excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
excl >>= baseLocalAtom; excl >>= baseLocalAtom;
#endif #endif
unsigned int tj = tgx%(TileSize/2); unsigned int tj = tgx%(TILE_SIZE/2);
for (unsigned int j = 0; j < TileSize/2; j++) { for (unsigned int j = 0; j < TILE_SIZE/2; j++) {
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -139,25 +132,24 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -139,25 +132,24 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
local_force[baseLocalAtom+tj+forceBufferOffset].xyz += delta.xyz; local_force[baseLocalAtom+tj+forceBufferOffset].xyz += delta.xyz;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
excl >>= 1; excl >>= 1;
tj = (tj+1)%(TileSize/2); tj = (tj+1)%(TILE_SIZE/2);
}
} }
// Sum the forces and write results. // Sum the forces and write results.
if (get_local_id(0) >= TileSize) if (get_local_id(0) >= TILE_SIZE)
tempBuffer[get_local_id(0)] = force; tempBuffer[get_local_id(0)] = force;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < TileSize) { if (get_local_id(0) < TILE_SIZE) {
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset1 = x + tgx + (y/TileSize)*PADDED_NUM_ATOMS; unsigned int offset1 = x + tgx + (y/TILE_SIZE)*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + (x/TileSize)*PADDED_NUM_ATOMS; unsigned int offset2 = y + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else #else
unsigned int offset1 = x + tgx + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset1 = x + tgx + get_group_id(0)*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset2 = y + tgx + get_group_id(0)*PADDED_NUM_ATOMS;
#endif #endif
forceBuffers[offset1].xyz += force.xyz+tempBuffer[get_local_id(0)+TileSize].xyz; forceBuffers[offset1].xyz = forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id(0)+TILE_SIZE].xyz;
forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz+local_force[get_local_id(0)+TileSize].xyz; forceBuffers[offset2].xyz = forceBuffers[offset2].xyz+local_force[get_local_id(0)].xyz+local_force[get_local_id(0)+TILE_SIZE].xyz;
} }
lasty = y; lasty = y;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment