Commit ae466134 authored by peastman's avatar peastman
Browse files

Improved performance of building neighbor list on AMD GPUs

parent f9fbe12a
...@@ -423,7 +423,9 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -423,7 +423,9 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
includeBlockFlags[get_local_id(0)] = includeBlock2; includeBlockFlags[get_local_id(0)] = includeBlock2;
SYNC_WARPS; SYNC_WARPS;
for (int i = 0; i < TILE_SIZE; i++) { for (int i = 0; i < TILE_SIZE; i++) {
if (includeBlockFlags[warpStart+i]) { while (i < TILE_SIZE && !includeBlockFlags[warpStart+i])
i++;
if (i < TILE_SIZE) {
unsigned short y = (unsigned short) sortedBlocks[block2Base+i].y; unsigned short y = (unsigned short) sortedBlocks[block2Base+i].y;
// Check each atom in block Y for interactions. // Check each atom in block Y for interactions.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment