Commit 6bea0a80 authored by Peter Eastman's avatar Peter Eastman
Browse files

Workarounds for bug in AMD compiler on Mac

parent 8ae2a598
...@@ -130,6 +130,8 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde ...@@ -130,6 +130,8 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde
} }
else else
simdWidth = 1; simdWidth = 1;
if (platforms[0].getInfo<CL_PLATFORM_VENDOR>() == "Apple" && vendor == "AMD")
compilationDefines["MAC_AMD_WORKAROUND"] == "";
if (supports64BitGlobalAtomics) if (supports64BitGlobalAtomics)
compilationDefines["SUPPORTS_64_BIT_ATOMICS"] = ""; compilationDefines["SUPPORTS_64_BIT_ATOMICS"] = "";
if (supportsDoublePrecision) if (supportsDoublePrecision)
......
...@@ -100,9 +100,20 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s ...@@ -100,9 +100,20 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
// Load an atom position and the bounding box the other block. // Load an atom position and the bounding box the other block.
int box = (group == 0 ? x : y);
int atom = (group == 0 ? y : x)*TILE_SIZE+index;
#ifdef MAC_AMD_WORKAROUND
__global float* bc = (__global float*) blockCenter;
__global float* bb = (__global float*) blockBoundingBox;
__global float* ps = (__global float*) posq;
center = (float4) (bc[4*box], bc[4*box+1], bc[4*box+2], 0.0f);
boxSize = (float4) (bb[4*box], bb[4*box+1], bb[4*box+2], 0.0f);
pos = (float4) (ps[4*atom], ps[4*atom+1], ps[4*atom+2], 0.0f);
#else
center = blockCenter[(group == 0 ? x : y)]; center = blockCenter[(group == 0 ? x : y)];
boxSize = blockBoundingBox[(group == 0 ? x : y)]; boxSize = blockBoundingBox[(group == 0 ? x : y)];
pos = posq[(group == 0 ? y : x)*TILE_SIZE+index]; pos = posq[(group == 0 ? y : x)*TILE_SIZE+index];
#endif
// Find the distance of the atom from the bounding box. // Find the distance of the atom from the bounding box.
...@@ -182,14 +193,24 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -182,14 +193,24 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
// Find the distance between the bounding boxes of the two cells. // Find the distance between the bounding boxes of the two cells.
#ifdef MAC_AMD_WORKAROUND
__global float* bc = (__global float*) blockCenter;
__global float* bb = (__global float*) blockBoundingBox;
float4 bcx = (float4) (bc[4*x], bc[4*x+1], bc[4*x+2], 0.0f);
float4 bcy = (float4) (bc[4*y], bc[4*y+1], bc[4*y+2], 0.0f);
float4 delta = bcx-bcy;
float4 boxSizea = (float4) (bb[4*x], bb[4*x+1], bb[4*x+2], 0.0f);
float4 boxSizeb = (float4) (bb[4*y], bb[4*y+1], bb[4*y+2], 0.0f);
#else
float4 delta = blockCenter[x]-blockCenter[y]; float4 delta = blockCenter[x]-blockCenter[y];
float4 boxSizea = blockBoundingBox[x];
float4 boxSizeb = blockBoundingBox[y];
#endif
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
float4 boxSizea = blockBoundingBox[x];
float4 boxSizeb = blockBoundingBox[y];
delta.x = max(0.0f, fabs(delta.x)-boxSizea.x-boxSizeb.x); delta.x = max(0.0f, fabs(delta.x)-boxSizea.x-boxSizeb.x);
delta.y = max(0.0f, fabs(delta.y)-boxSizea.y-boxSizeb.y); delta.y = max(0.0f, fabs(delta.y)-boxSizea.y-boxSizeb.y);
delta.z = max(0.0f, fabs(delta.z)-boxSizea.z-boxSizeb.z); delta.z = max(0.0f, fabs(delta.z)-boxSizea.z-boxSizeb.z);
......
...@@ -278,10 +278,29 @@ __kernel void gridInterpolateForce(__global const float4* restrict posq, __globa ...@@ -278,10 +278,29 @@ __kernel void gridInterpolateForce(__global const float4* restrict posq, __globa
float gridvalue = pmeGrid[index].x; float gridvalue = pmeGrid[index].x;
force.x += ddata[ix].x*data[iy].y*data[iz].z*gridvalue; force.x += ddata[ix].x*data[iy].y*data[iz].z*gridvalue;
force.y += data[ix].x*ddata[iy].y*data[iz].z*gridvalue; force.y += data[ix].x*ddata[iy].y*data[iz].z*gridvalue;
#ifndef MAC_AMD_WORKAROUND
force.z += data[ix].x*data[iy].y*ddata[iz].z*gridvalue; force.z += data[ix].x*data[iy].y*ddata[iz].z*gridvalue;
#endif
} }
} }
} }
#ifdef MAC_AMD_WORKAROUND
for (int ix = 0; ix < PME_ORDER; ix++) {
int xindex = gridIndex.x+ix;
xindex -= (xindex >= GRID_SIZE_X ? GRID_SIZE_X : 0);
for (int iy = 0; iy < PME_ORDER; iy++) {
int yindex = gridIndex.y+iy;
yindex -= (yindex >= GRID_SIZE_Y ? GRID_SIZE_Y : 0);
for (int iz = 0; iz < PME_ORDER; iz++) {
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
float gridvalue = pmeGrid[index].x;
force.z += data[ix].x*data[iy].y*ddata[iz].z*gridvalue;
}
}
}
#endif
float4 totalForce = forceBuffers[atom]; float4 totalForce = forceBuffers[atom];
float q = pos.w*EPSILON_FACTOR; float q = pos.w*EPSILON_FACTOR;
totalForce.x -= q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x; totalForce.x -= q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x;
......
...@@ -60,7 +60,12 @@ __kernel void assignElementsToBuckets(__global const TYPE* restrict data, int le ...@@ -60,7 +60,12 @@ __kernel void assignElementsToBuckets(__global const TYPE* restrict data, int le
float maxValue = dataRange.y; float maxValue = dataRange.y;
float bucketWidth = (maxValue-minValue)/numBuckets; float bucketWidth = (maxValue-minValue)/numBuckets;
for (int index = get_global_id(0); index < length; index += get_global_size(0)) { for (int index = get_global_id(0); index < length; index += get_global_size(0)) {
#ifdef MAC_AMD_WORKAROUND
__global int* d = (__global int*) data;
int2 element = (int2) (d[2*index], d[2*index+1]);
#else
TYPE element = data[index]; TYPE element = data[index];
#endif
float value = getValue(element); float value = getValue(element);
int bucketIndex = min((int) ((value-minValue)/bucketWidth), numBuckets-1); int bucketIndex = min((int) ((value-minValue)/bucketWidth), numBuckets-1);
offsetInBucket[index] = atom_inc(&bucketOffset[bucketIndex]); offsetInBucket[index] = atom_inc(&bucketOffset[bucketIndex]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment