"platforms/vscode:/vscode.git/clone" did not exist on "d9941f475d4a2d64e7a5c8573ee143b1d9694ce4"
Commit a8fe9cea authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing changes to reduce memory use for large systems

parent b0d2357c
......@@ -84,6 +84,13 @@ struct mm_float16 {
s8(s8), s9(s9), s10(s10), s11(s11), s12(s12), s13(s13), s14(s14), s15(15) {
}
};
struct mm_ushort2 {
cl_ushort x, y;
mm_ushort2() {
}
mm_ushort2(cl_ushort x, cl_ushort y) : x(x), y(y) {
}
};
struct mm_int2 {
cl_int x, y;
mm_int2() {
......
......@@ -200,7 +200,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create data structures for the neighbor list.
if (useCutoff) {
interactingTiles = new OpenCLArray<cl_uint>(context, numTiles, "interactingTiles");
interactingTiles = new OpenCLArray<mm_ushort2>(context, numTiles, "interactingTiles");
interactionFlags = new OpenCLArray<cl_uint>(context, numTiles, "interactionFlags");
interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount");
blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter");
......@@ -230,6 +230,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel.setArg<cl::Buffer>(4, blockBoundingBox->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, context.getPosq().getDeviceBuffer());
findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks");
findInteractionsWithinBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff));
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
......
......@@ -148,7 +148,7 @@ public:
/**
* Get the array containing tiles with interactions.
*/
OpenCLArray<cl_uint>& getInteractingTiles() {
OpenCLArray<mm_ushort2>& getInteractingTiles() {
return *interactingTiles;
}
/**
......@@ -197,7 +197,7 @@ private:
OpenCLArray<cl_uint>* exclusions;
OpenCLArray<cl_uint>* exclusionIndices;
OpenCLArray<cl_uint>* exclusionRowIndices;
OpenCLArray<cl_uint>* interactingTiles;
OpenCLArray<mm_ushort2>* interactingTiles;
OpenCLArray<cl_uint>* interactionFlags;
OpenCLArray<cl_uint>* interactionCount;
OpenCLArray<mm_float4>* blockCenter;
......
......@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices,
__global unsigned int* exclusionRowIndices, __local float4* tempForceBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else
unsigned int numTiles
#endif
......@@ -30,9 +30,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -207,8 +207,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
STORE_DERIVATIVES_1
STORE_DERIVATIVES_2
}
lasty = y;
}
lasty = y;
pos++;
}
energyBuffer[get_global_id(0)] += energy;
......
......@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global float4* posq, __local float4* local_posq, __global unsigned int* exclusions, __global unsigned int* exclusionIndices,
__global unsigned int* exclusionRowIndices, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else
unsigned int numTiles
#endif
......@@ -31,9 +31,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -195,8 +195,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
STORE_DERIVATIVES_1
STORE_DERIVATIVES_2
lasty = y;
}
lasty = y;
pos++;
}
energyBuffer[get_global_id(0)] += energy;
......
......@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value,
__local float* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else
unsigned int numTiles
#endif
......@@ -27,9 +27,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -196,8 +196,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
global_value[offset1] += value+tempBuffer[get_local_id(0)+TILE_SIZE];
global_value[offset2] += local_value[get_local_id(0)]+local_value[get_local_id(0)+TILE_SIZE];
}
lasty = y;
}
lasty = y;
pos++;
}
}
......@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __global float* global_value, __local float* local_value,
__local float* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else
unsigned int numTiles
#endif
......@@ -29,9 +29,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -233,8 +233,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
#endif
global_value[offset1] += value;
global_value[offset2] += local_value[get_local_id(0)];
lasty = y;
}
lasty = y;
pos++;
}
}
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#define TILE_SIZE 32
#define GROUP_SIZE 64
#define BUFFER_GROUPS 4
......@@ -44,31 +45,34 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
* This is called by findBlocksWithInteractions(). It compacts the list of blocks and writes them
* to global memory.
*/
void storeInteractionData(__local short2* buffer, __local bool* valid, __local int* sum, __local int* sum2, __local short2* temp, __local int* baseIndex,
__global unsigned int* interactionCount, __global unsigned int* interactingTiles) {
void storeInteractionData(__local ushort2* buffer, __local int* valid, __local short* sum, __local ushort2* temp, __local int* baseIndex,
__global unsigned int* interactionCount, __global ushort2* interactingTiles, float cutoffSquared, float4 periodicBoxSize,
float4 invPeriodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox) {
// The buffer is full, so we need to compact it and write out results. Start by doing a parallel prefix sum.
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = (valid[i] ? 1 : 0);
temp[i].x = (valid[i] ? 1 : 0);
barrier(CLK_LOCAL_MEM_FENCE);
int whichBuffer = 0;
for (int offset = 1; offset < BUFFER_SIZE; offset *= 2) {
if (whichBuffer == 0)
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum2[i] = (i < offset ? sum[i] : sum[i]+sum[i-offset]);
temp[i].y = (i < offset ? temp[i].x : temp[i].x+temp[i-offset].x);
else
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = (i < offset ? sum2[i] : sum2[i]+sum2[i-offset]);
temp[i].x = (i < offset ? temp[i].y : temp[i].y+temp[i-offset].y);
whichBuffer = 1-whichBuffer;
barrier(CLK_LOCAL_MEM_FENCE);
}
if (whichBuffer == 1) {
if (whichBuffer == 0)
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = sum2[i];
barrier(CLK_LOCAL_MEM_FENCE);
}
sum[i] = temp[i].x;
else
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i] = temp[i].y;
barrier(CLK_LOCAL_MEM_FENCE);
// Compact the buffer and store it to global memory.
// Compact the buffer.
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
if (valid[i]) {
......@@ -77,14 +81,74 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
}
barrier(CLK_LOCAL_MEM_FENCE);
int numValid = sum[BUFFER_SIZE-1];
if (get_local_id(0) == 0)
*baseIndex = atom_add(interactionCount, numValid);
barrier(CLK_LOCAL_MEM_FENCE);
// Filter the list of tiles by comparing the distance from each atom to the other bounding box.
int tile;
int index = get_local_id(0)&(TILE_SIZE-1);
int group = get_local_id(0)/TILE_SIZE;
__local int* flag = sum;
int lasty = -1;
float4 center, boxSize, pos;
for (tile = 0; tile < numValid; tile++) {
int x = temp[tile].x;
int y = temp[tile].y;
if (x == y) {
tile++;
continue;
}
if (index == 0)
flag[group] = true;
barrier(CLK_LOCAL_MEM_FENCE);
// Load an atom position and the bounding box the other block.
if (group == 0) {
center = blockCenter[x];
boxSize = blockBoundingBox[x];
if (y != lasty)
pos = posq[y*TILE_SIZE+index];
}
else {
if (y != lasty) {
center = blockCenter[y];
boxSize = blockBoundingBox[y];
}
pos = posq[x*TILE_SIZE+index];
}
lasty = y;
// Find the distance of the atom from the bounding box.
float4 delta = pos-center;
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
delta = max((float4) 0.0f, fabs(delta)-boxSize);
if (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < cutoffSquared)
flag[group] = false;
barrier(CLK_LOCAL_MEM_FENCE);
if (flag[0] || flag[1]) {
// This tile contains no interactions.
numValid--;
if (get_local_id(0) == 0)
temp[tile] = temp[numValid];
}
else
tile++;
barrier(CLK_LOCAL_MEM_FENCE);
}
// Store it to global memory.
if (get_local_id(0) == 0)
*baseIndex = atom_add(interactionCount, numValid);
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = get_local_id(0); i < numValid; i += GROUP_SIZE)
interactingTiles[*baseIndex+i] = (temp[i].x<<17)+(temp[i].y<<2);
interactingTiles[*baseIndex+i] = temp[i];
barrier(CLK_LOCAL_MEM_FENCE);
}
......@@ -93,12 +157,11 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
* mark them as non-interacting.
*/
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global unsigned int* interactingTiles) {
__local short2 buffer[BUFFER_SIZE];
__local bool valid[BUFFER_SIZE];
__local int sum[BUFFER_SIZE];
__local int sum2[BUFFER_SIZE];
__local short2 temp[BUFFER_SIZE];
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global float4* posq) {
__local ushort2 buffer[BUFFER_SIZE];
__local int valid[BUFFER_SIZE];
__local short sum[BUFFER_SIZE];
__local ushort2 temp[BUFFER_SIZE];
__local int bufferFull;
__local int globalIndex;
int valuesInBuffer = 0;
......@@ -138,7 +201,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
int bufferIndex = valuesInBuffer*GROUP_SIZE+get_local_id(0);
valid[bufferIndex] = true;
buffer[bufferIndex] = (short2) (x, y);
buffer[bufferIndex] = (ushort2) (x, y);
valuesInBuffer++;
if (!bufferFull && valuesInBuffer == BUFFER_GROUPS)
bufferFull = true;
......@@ -146,21 +209,21 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
}
barrier(CLK_LOCAL_MEM_FENCE);
if (bufferFull) {
storeInteractionData(buffer, valid, sum, sum2, temp, &globalIndex, interactionCount, interactingTiles);
storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox);
valuesInBuffer = 0;
if (get_local_id(0) == 0)
bufferFull = false;
barrier(CLK_LOCAL_MEM_FENCE);
}
}
storeInteractionData(buffer, valid, sum, sum2, temp, &globalIndex, interactionCount, interactingTiles);
storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox);
}
/**
* Compare each atom in one block to the bounding box of another block, and set
* flags for which ones are interacting.
*/
__kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global unsigned int* tiles, __global float4* blockCenter,
__kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global ushort2* tiles, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local unsigned int* flags) {
unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE;
......@@ -173,13 +236,10 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
float4 apos;
while (pos < end) {
// Extract the coordinates of this tile
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
bool hasExclusions = (x & 0x1);
x = (x >> 17);
if (x == y || hasExclusions) {
// Assume this tile will be dense.
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
if (x == y) {
if (index == 0)
interactionFlags[pos] = 0xFFFFFFFF;
}
......
......@@ -17,7 +17,7 @@ typedef struct {
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else
unsigned int numTiles) {
#endif
......@@ -32,9 +32,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -181,8 +181,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
global_bornSum[offset1] += bornSum+tempBuffer[get_local_id(0)+TILE_SIZE];
global_bornSum[offset2] += localData[get_local_id(0)].bornSum+localData[get_local_id(0)+TILE_SIZE].bornSum;
}
lasty = y;
}
lasty = y;
pos++;
}
}
......@@ -196,7 +196,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global float4* posq, __global float* global_bornRadii,
__global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else
unsigned int numTiles) {
#endif
......@@ -211,9 +211,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -366,8 +366,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
global_bornForce[offset1] += force.w+tempBuffer[get_local_id(0)+TILE_SIZE].w;
global_bornForce[offset2] += sum.w;
}
lasty = y;
}
lasty = y;
pos++;
}
energyBuffer[get_global_id(0)] += energy;
......
......@@ -17,7 +17,7 @@ typedef struct {
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else
unsigned int numTiles) {
#endif
......@@ -34,9 +34,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -247,8 +247,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
#endif
global_bornSum[offset1] += bornSum;
global_bornSum[offset2] += localData[get_local_id(0)].bornSum;
lasty = y;
}
lasty = y;
pos++;
}
}
......@@ -262,7 +262,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global float4* posq, __global float* global_bornRadii,
__global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
#else
unsigned int numTiles) {
#endif
......@@ -279,9 +279,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -489,8 +489,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
forceBuffers[offset2] += (float4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0);
global_bornForce[offset1] += force.w;
global_bornForce[offset2] += localData[get_local_id(0)].fw;
lasty = y;
}
lasty = y;
pos++;
}
energyBuffer[get_global_id(0)] += energy;
......
......@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else
unsigned int numTiles
#endif
......@@ -33,9 +33,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -212,8 +212,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
float4 sum = (float4) (localData[get_local_id(0)].fx+localData[get_local_id(0)+TILE_SIZE].fx, localData[get_local_id(0)].fy+localData[get_local_id(0)+TILE_SIZE].fy, localData[get_local_id(0)].fz+localData[get_local_id(0)+TILE_SIZE].fz, 0.0f);
forceBuffers[offset2].xyz = forceBuffers[offset2].xyz+sum.xyz;
}
lasty = y;
}
lasty = y;
pos++;
}
energyBuffer[get_global_id(0)] += energy;
......
......@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global unsigned int* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
__global ushort2* tiles, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize
#else
unsigned int numTiles
#endif
......@@ -35,9 +35,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while (pos < end) {
// Extract the coordinates of this tile
#ifdef USE_CUTOFF
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
x = (x>>17);
ushort2 tileIndices = tiles[pos];
unsigned int x = tileIndices.x;
unsigned int y = tileIndices.y;
#else
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
unsigned int x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
......@@ -261,8 +261,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
#endif
forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset2] += (float4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
lasty = y;
}
lasty = y;
pos++;
}
energyBuffer[get_global_id(0)] += energy;
......
......@@ -517,7 +517,7 @@ void testBlockInteractions(bool periodic) {
// Verify that interactions were identified correctly.
vector<cl_uint> interactionCount;
vector<cl_uint> interactingTiles;
vector<mm_ushort2> interactingTiles;
vector<cl_uint> interactionFlags;
nb.getInteractionCount().download(interactionCount);
int numWithInteractions = interactionCount[0];
......@@ -528,9 +528,8 @@ void testBlockInteractions(bool periodic) {
const unsigned int grid = OpenCLContext::TileSize;
const unsigned int dim = clcontext.getNumAtomBlocks();
for (int i = 0; i < numWithInteractions; i++) {
unsigned int tile = interactingTiles[i];
unsigned int x = (tile >> 17);
unsigned int y = ((tile >> 2) & 0x7fff);
unsigned int x = interactingTiles[i].x;
unsigned int y = interactingTiles[i].y;
int index = (x > y ? x+y*dim-y*(y+1)/2 : y+x*dim-x*(x+1)/2);
hasInteractions[index] = true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment