"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "c28ae9037216fc35149cdc0167bd5c1d069e7c81"
Commit a8fe9cea authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing changes to reduce memory use for large systems

parent b0d2357c
...@@ -84,6 +84,13 @@ struct mm_float16 { ...@@ -84,6 +84,13 @@ struct mm_float16 {
s8(s8), s9(s9), s10(s10), s11(s11), s12(s12), s13(s13), s14(s14), s15(15) { s8(s8), s9(s9), s10(s10), s11(s11), s12(s12), s13(s13), s14(s14), s15(15) {
} }
}; };
struct mm_ushort2 {
cl_ushort x, y;
mm_ushort2() {
}
mm_ushort2(cl_ushort x, cl_ushort y) : x(x), y(y) {
}
};
struct mm_int2 { struct mm_int2 {
cl_int x, y; cl_int x, y;
mm_int2() { mm_int2() {
......
...@@ -200,7 +200,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -200,7 +200,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create data structures for the neighbor list. // Create data structures for the neighbor list.
if (useCutoff) { if (useCutoff) {
interactingTiles = new OpenCLArray<cl_uint>(context, numTiles, "interactingTiles"); interactingTiles = new OpenCLArray<mm_ushort2>(context, numTiles, "interactingTiles");
interactionFlags = new OpenCLArray<cl_uint>(context, numTiles, "interactionFlags"); interactionFlags = new OpenCLArray<cl_uint>(context, numTiles, "interactionFlags");
interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount"); interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount");
blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter"); blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter");
...@@ -230,6 +230,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -230,6 +230,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel.setArg<cl::Buffer>(4, blockBoundingBox->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(4, blockBoundingBox->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, context.getPosq().getDeviceBuffer());
findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks"); findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks");
findInteractionsWithinBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff)); findInteractionsWithinBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff));
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer()); findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
......
...@@ -148,7 +148,7 @@ public: ...@@ -148,7 +148,7 @@ public:
/** /**
* Get the array containing tiles with interactions. * Get the array containing tiles with interactions.
*/ */
OpenCLArray<cl_uint>& getInteractingTiles() { OpenCLArray<mm_ushort2>& getInteractingTiles() {
return *interactingTiles; return *interactingTiles;
} }
/** /**
...@@ -197,7 +197,7 @@ private: ...@@ -197,7 +197,7 @@ private:
OpenCLArray<cl_uint>* exclusions; OpenCLArray<cl_uint>* exclusions;
OpenCLArray<cl_uint>* exclusionIndices; OpenCLArray<cl_uint>* exclusionIndices;
OpenCLArray<cl_uint>* exclusionRowIndices; OpenCLArray<cl_uint>* exclusionRowIndices;
OpenCLArray<cl_uint>* interactingTiles; OpenCLArray<mm_ushort2>* interactingTiles;
OpenCLArray<cl_uint>* interactionFlags; OpenCLArray<cl_uint>* interactionFlags;
OpenCLArray<cl_uint>* interactionCount; OpenCLArray<cl_uint>* interactionCount;
OpenCLArray<mm_float4>* blockCenter; OpenCLArray<mm_float4>* blockCenter;
......
...@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer ...@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices, __global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices,
__global unsigned int* exclusionRowIndices, __local float4* tempForceBuffer, __global unsigned int* exclusionRowIndices, __local float4* tempForceBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -30,9 +30,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer ...@@ -30,9 +30,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -207,8 +207,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer ...@@ -207,8 +207,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
STORE_DERIVATIVES_2 STORE_DERIVATIVES_2
} }
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
......
...@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer ...@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices, __global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices,
__global unsigned int* exclusionRowIndices, __local float4* tempBuffer, __global unsigned int* exclusionRowIndices, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -31,9 +31,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer ...@@ -31,9 +31,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -195,8 +195,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer ...@@ -195,8 +195,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz; forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
STORE_DERIVATIVES_1 STORE_DERIVATIVES_1
STORE_DERIVATIVES_2 STORE_DERIVATIVES_2
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
......
...@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global ...@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value, __global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value,
__local float* tempBuffer, __local float* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -27,9 +27,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global ...@@ -27,9 +27,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -196,8 +196,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global ...@@ -196,8 +196,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
global_value[offset1] += value+tempBuffer[get_local_id(0)+TILE_SIZE]; global_value[offset1] += value+tempBuffer[get_local_id(0)+TILE_SIZE];
global_value[offset2] += local_value[get_local_id(0)]+local_value[get_local_id(0)+TILE_SIZE]; global_value[offset2] += local_value[get_local_id(0)]+local_value[get_local_id(0)+TILE_SIZE];
} }
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
} }
...@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global ...@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value, __global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value,
__local float* tempBuffer, __local float* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -29,9 +29,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global ...@@ -29,9 +29,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -233,8 +233,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global ...@@ -233,8 +233,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
#endif #endif
global_value[offset1] += value; global_value[offset1] += value;
global_value[offset2] += local_value[get_local_id(0)]; global_value[offset2] += local_value[get_local_id(0)];
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
} }
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#define TILE_SIZE 32 #define TILE_SIZE 32
#define GROUP_SIZE 64 #define GROUP_SIZE 64
#define BUFFER_GROUPS 4 #define BUFFER_GROUPS 4
...@@ -44,31 +45,34 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe ...@@ -44,31 +45,34 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
* This is called by findBlocksWithInteractions(). It compacts the list of blocks and writes them * This is called by findBlocksWithInteractions(). It compacts the list of blocks and writes them
* to global memory. * to global memory.
*/ */
void storeInteractionData(__local short2* buffer, __local bool* valid, __local int* sum, __local int* sum2, __local short2* temp, __local int* baseIndex, void storeInteractionData(__local ushort2* buffer, __local int* valid, __local short* sum, __local ushort2* temp, __local int* baseIndex,
__global unsigned int* interactionCount, __global unsigned int* interactingTiles) { __global unsigned int* interactionCount, __global ushort2* interactingTiles, float cutoffSquared, float4 periodicBoxSize,
float4 invPeriodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox) {
// The buffer is full, so we need to compact it and write out results. Start by doing a parallel prefix sum. // The buffer is full, so we need to compact it and write out results. Start by doing a parallel prefix sum.
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE) for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = (valid[i] ? 1 : 0); temp[i].x = (valid[i] ? 1 : 0);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int whichBuffer = 0; int whichBuffer = 0;
for (int offset = 1; offset < BUFFER_SIZE; offset *= 2) { for (int offset = 1; offset < BUFFER_SIZE; offset *= 2) {
if (whichBuffer == 0) if (whichBuffer == 0)
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE) for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum2[i] = (i < offset ? sum[i] : sum[i]+sum[i-offset]); temp[i].y = (i < offset ? temp[i].x : temp[i].x+temp[i-offset].x);
else else
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE) for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = (i < offset ? sum2[i] : sum2[i]+sum2[i-offset]); temp[i].x = (i < offset ? temp[i].y : temp[i].y+temp[i-offset].y);
whichBuffer = 1-whichBuffer; whichBuffer = 1-whichBuffer;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if (whichBuffer == 1) { if (whichBuffer == 0)
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = temp[i].x;
else
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE) for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = sum2[i]; sum[i] = temp[i].y;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
}
// Compact the buffer and store it to global memory. // Compact the buffer.
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE) for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
if (valid[i]) { if (valid[i]) {
...@@ -77,14 +81,74 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i ...@@ -77,14 +81,74 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int numValid = sum[BUFFER_SIZE-1]; int numValid = sum[BUFFER_SIZE-1];
// Filter the list of tiles by comparing the distance from each atom to the other bounding box.
int tile;
int index = get_local_id(0)&(TILE_SIZE-1);
int group = get_local_id(0)/TILE_SIZE;
__local int* flag = sum;
int lasty = -1;
float4 center, boxSize, pos;
for (tile = 0; tile < numValid; tile++) {
int x = temp[tile].x;
int y = temp[tile].y;
if (x == y) {
tile++;
continue;
}
if (index == 0)
flag[group] = true;
barrier(CLK_LOCAL_MEM_FENCE);
// Load an atom position and the bounding box the other block.
if (group == 0) {
center = blockCenter[x];
boxSize = blockBoundingBox[x];
if (y != lasty)
pos = posq[y*TILE_SIZE+index];
}
else {
if (y != lasty) {
center = blockCenter[y];
boxSize = blockBoundingBox[y];
}
pos = posq[x*TILE_SIZE+index];
}
lasty = y;
// Find the distance of the atom from the bounding box.
float4 delta = pos-center;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
delta = max((float4) 0.0f, fabs(delta)-boxSize);
if (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < cutoffSquared)
flag[group] = false;
barrier(CLK_LOCAL_MEM_FENCE);
if (flag[0] || flag[1]) {
// This tile contains no interactions.
numValid--;
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
*baseIndex = atom_add(interactionCount, numValid); temp[tile] = temp[numValid];
}
else
tile++;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
}
// Store it to global memory. // Store it to global memory.
if (get_local_id(0) == 0)
*baseIndex = atom_add(interactionCount, numValid);
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = get_local_id(0); i < numValid; i += GROUP_SIZE) for (int i = get_local_id(0); i < numValid; i += GROUP_SIZE)
interactingTiles[*baseIndex+i] = (temp[i].x<<17)+(temp[i].y<<2); interactingTiles[*baseIndex+i] = temp[i];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
...@@ -93,12 +157,11 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i ...@@ -93,12 +157,11 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
* mark them as non-interacting. * mark them as non-interacting.
*/ */
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter, __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global unsigned int* interactingTiles) { __global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global float4* posq) {
__local short2 buffer[BUFFER_SIZE]; __local ushort2 buffer[BUFFER_SIZE];
__local bool valid[BUFFER_SIZE]; __local int valid[BUFFER_SIZE];
__local int sum[BUFFER_SIZE]; __local short sum[BUFFER_SIZE];
__local int sum2[BUFFER_SIZE]; __local ushort2 temp[BUFFER_SIZE];
__local short2 temp[BUFFER_SIZE];
__local int bufferFull; __local int bufferFull;
__local int globalIndex; __local int globalIndex;
int valuesInBuffer = 0; int valuesInBuffer = 0;
...@@ -138,7 +201,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -138,7 +201,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
int bufferIndex = valuesInBuffer*GROUP_SIZE+get_local_id(0); int bufferIndex = valuesInBuffer*GROUP_SIZE+get_local_id(0);
valid[bufferIndex] = true; valid[bufferIndex] = true;
buffer[bufferIndex] = (short2) (x, y); buffer[bufferIndex] = (ushort2) (x, y);
valuesInBuffer++; valuesInBuffer++;
if (!bufferFull && valuesInBuffer == BUFFER_GROUPS) if (!bufferFull && valuesInBuffer == BUFFER_GROUPS)
bufferFull = true; bufferFull = true;
...@@ -146,21 +209,21 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -146,21 +209,21 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (bufferFull) { if (bufferFull) {
storeInteractionData(buffer, valid, sum, sum2, temp, &globalIndex, interactionCount, interactingTiles); storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox);
valuesInBuffer = 0; valuesInBuffer = 0;
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
bufferFull = false; bufferFull = false;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
} }
storeInteractionData(buffer, valid, sum, sum2, temp, &globalIndex, interactionCount, interactingTiles); storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox);
} }
/** /**
* Compare each atom in one block to the bounding box of another block, and set * Compare each atom in one block to the bounding box of another block, and set
* flags for which ones are interacting. * flags for which ones are interacting.
*/ */
__kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global unsigned int* tiles, __global float4* blockCenter, __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global ushort2* tiles, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local unsigned int* flags) { __global float4* blockBoundingBox, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local unsigned int* flags) {
unsigned int totalWarps = get_global_size(0)/TILE_SIZE; unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE; unsigned int warp = get_global_id(0)/TILE_SIZE;
...@@ -173,13 +236,10 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB ...@@ -173,13 +236,10 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
float4 apos; float4 apos;
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
bool hasExclusions = (x & 0x1); unsigned int y = tileIndices.y;
x = (x >> 17); if (x == y) {
if (x == y || hasExclusions) {
// Assume this tile will be dense.
if (index == 0) if (index == 0)
interactionFlags[pos] = 0xFFFFFFFF; interactionFlags[pos] = 0xFFFFFFFF;
} }
......
...@@ -17,7 +17,7 @@ typedef struct { ...@@ -17,7 +17,7 @@ typedef struct {
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer, void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) { __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
...@@ -32,9 +32,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -32,9 +32,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -181,8 +181,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -181,8 +181,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
global_bornSum[offset1] += bornSum+tempBuffer[get_local_id(0)+TILE_SIZE]; global_bornSum[offset1] += bornSum+tempBuffer[get_local_id(0)+TILE_SIZE];
global_bornSum[offset2] += localData[get_local_id(0)].bornSum+localData[get_local_id(0)+TILE_SIZE].bornSum; global_bornSum[offset2] += localData[get_local_id(0)].bornSum+localData[get_local_id(0)+TILE_SIZE].bornSum;
} }
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
} }
...@@ -196,7 +196,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -196,7 +196,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global float4* posq, __global float* global_bornRadii, __global float4* posq, __global float* global_bornRadii,
__global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer, __global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) { __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
...@@ -211,9 +211,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -211,9 +211,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -366,8 +366,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -366,8 +366,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
global_bornForce[offset1] += force.w+tempBuffer[get_local_id(0)+TILE_SIZE].w; global_bornForce[offset1] += force.w+tempBuffer[get_local_id(0)+TILE_SIZE].w;
global_bornForce[offset2] += sum.w; global_bornForce[offset2] += sum.w;
} }
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
......
...@@ -17,7 +17,7 @@ typedef struct { ...@@ -17,7 +17,7 @@ typedef struct {
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer, void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) { __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
...@@ -34,9 +34,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -34,9 +34,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -247,8 +247,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -247,8 +247,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
#endif #endif
global_bornSum[offset1] += bornSum; global_bornSum[offset1] += bornSum;
global_bornSum[offset2] += localData[get_local_id(0)].bornSum; global_bornSum[offset2] += localData[get_local_id(0)].bornSum;
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
} }
...@@ -262,7 +262,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -262,7 +262,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global float4* posq, __global float* global_bornRadii, __global float4* posq, __global float* global_bornRadii,
__global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer, __global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) { __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
...@@ -279,9 +279,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -279,9 +279,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -489,8 +489,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -489,8 +489,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
forceBuffers[offset2] += (float4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0); forceBuffers[offset2] += (float4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0);
global_bornForce[offset1] += force.w; global_bornForce[offset1] += force.w;
global_bornForce[offset2] += localData[get_local_id(0)].fw; global_bornForce[offset2] += localData[get_local_id(0)].fw;
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
......
...@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) ...@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions, void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer, __global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -33,9 +33,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -33,9 +33,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -212,8 +212,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -212,8 +212,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
float4 sum = (float4) (localData[get_local_id(0)].fx+localData[get_local_id(0)+TILE_SIZE].fx, localData[get_local_id(0)].fy+localData[get_local_id(0)+TILE_SIZE].fy, localData[get_local_id(0)].fz+localData[get_local_id(0)+TILE_SIZE].fz, 0.0f); float4 sum = (float4) (localData[get_local_id(0)].fx+localData[get_local_id(0)+TILE_SIZE].fx, localData[get_local_id(0)].fy+localData[get_local_id(0)+TILE_SIZE].fy, localData[get_local_id(0)].fz+localData[get_local_id(0)+TILE_SIZE].fz, 0.0f);
forceBuffers[offset2].xyz = forceBuffers[offset2].xyz+sum.xyz; forceBuffers[offset2].xyz = forceBuffers[offset2].xyz+sum.xyz;
} }
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
......
...@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) ...@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions, void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer, __global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize __global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -35,9 +35,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -35,9 +35,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile // Extract the coordinates of this tile
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int x = tiles[pos]; ushort2 tileIndices = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff); unsigned int x = tileIndices.x;
x = (x>>17); unsigned int y = tileIndices.y;
#else #else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
...@@ -261,8 +261,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -261,8 +261,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
#endif #endif
forceBuffers[offset1].xyz += force.xyz; forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset2] += (float4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f); forceBuffers[offset2] += (float4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
lasty = y;
} }
lasty = y;
pos++; pos++;
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
......
...@@ -517,7 +517,7 @@ void testBlockInteractions(bool periodic) { ...@@ -517,7 +517,7 @@ void testBlockInteractions(bool periodic) {
// Verify that interactions were identified correctly. // Verify that interactions were identified correctly.
vector<cl_uint> interactionCount; vector<cl_uint> interactionCount;
vector<cl_uint> interactingTiles; vector<mm_ushort2> interactingTiles;
vector<cl_uint> interactionFlags; vector<cl_uint> interactionFlags;
nb.getInteractionCount().download(interactionCount); nb.getInteractionCount().download(interactionCount);
int numWithInteractions = interactionCount[0]; int numWithInteractions = interactionCount[0];
...@@ -528,9 +528,8 @@ void testBlockInteractions(bool periodic) { ...@@ -528,9 +528,8 @@ void testBlockInteractions(bool periodic) {
const unsigned int grid = OpenCLContext::TileSize; const unsigned int grid = OpenCLContext::TileSize;
const unsigned int dim = clcontext.getNumAtomBlocks(); const unsigned int dim = clcontext.getNumAtomBlocks();
for (int i = 0; i < numWithInteractions; i++) { for (int i = 0; i < numWithInteractions; i++) {
unsigned int tile = interactingTiles[i]; unsigned int x = interactingTiles[i].x;
unsigned int x = (tile >> 17); unsigned int y = interactingTiles[i].y;
unsigned int y = ((tile >> 2) & 0x7fff);
int index = (x > y ? x+y*dim-y*(y+1)/2 : y+x*dim-x*(x+1)/2); int index = (x > y ? x+y*dim-y*(y+1)/2 : y+x*dim-x*(x+1)/2);
hasInteractions[index] = true; hasInteractions[index] = true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment