Commit 8a762772 authored by peastman's avatar peastman
Browse files

Merge pull request #1091 from rmcgibbo/osx-opencl

Fix OpenCL platform on low-end devices
parents 14bd3629 8ae2eba5
......@@ -130,7 +130,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// This will be less than the wavefront width since it takes several
// cycles to execute the full wavefront.
// The SIMD instruction width is the VLIW instruction width (or 1 for scalar),
// this is the number of ALUs that can be executing per instruction per thread.
// this is the number of ALUs that can be executing per instruction per thread.
devices[i].getInfo<CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD>() *
devices[i].getInfo<CL_DEVICE_SIMD_WIDTH_AMD>() *
devices[i].getInfo<CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD>();
......@@ -342,9 +342,9 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["EXP"] = "exp";
compilationDefines["LOG"] = "log";
}
// Set defines for applying periodic boundary conditions.
Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
boxIsTriclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
......@@ -392,11 +392,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
}
// Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread();
// Create utilities objects.
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
integration = new OpenCLIntegrationUtilities(*this, system);
......@@ -512,7 +512,7 @@ string OpenCLContext::replaceStrings(const string& input, const std::map<std::st
if (index != result.npos) {
if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
// We have found a complete symbol, not part of a longer symbol.
result.replace(index, size, iter->second);
index += iter->second.size();
}
......@@ -797,7 +797,7 @@ private:
void OpenCLContext::findMoleculeGroups() {
// The first time this is called, we need to identify all the molecules in the system.
if (moleculeGroups.size() == 0) {
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
......@@ -879,7 +879,7 @@ void OpenCLContext::findMoleculeGroups() {
if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
identical = false;
}
// See if the constraints are identical.
for (int i = 0; i < (int) mol.constraints.size() && identical; i++) {
......@@ -960,11 +960,11 @@ void OpenCLContext::invalidateMolecules() {
}
if (valid)
return;
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
vector<mm_int4> newCellOffsets(numAtoms);
if (useDoublePrecision) {
vector<mm_double4> oldPosq(paddedNumAtoms);
......
......@@ -186,7 +186,7 @@ static bool compareUshort2(mm_ushort2 a, mm_ushort2 b) {
void OpenCLNonbondedUtilities::initialize(const System& system) {
if (atomExclusions.size() == 0) {
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
atomExclusions.resize(context.getNumAtoms());
for (int i = 0; i < (int) atomExclusions.size(); i++)
atomExclusions[i].push_back(i);
......@@ -199,7 +199,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
setAtomBlockRange(context.getContextIndex()/(double) numContexts, (context.getContextIndex()+1)/(double) numContexts);
// Build a list of tiles that contain exclusions.
set<pair<int, int> > tilesWithExclusions;
for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
int x = atom1/OpenCLContext::TileSize;
......@@ -410,7 +410,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
numTiles = (int) (endFraction*totalTiles)-startTileIndex;
if (useCutoff) {
// We are using a cutoff, and the kernels have already been created.
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
iter->second.forceKernel.setArg<cl_uint>(5, startTileIndex);
iter->second.forceKernel.setArg<cl_uint>(6, numTiles);
......@@ -491,7 +491,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList->getDeviceBuffer());
if (kernels.findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) {
// The device can't handle this block size, so reduce it.
groupSize -= 32;
if (groupSize < 32)
throw OpenMMException("Failed to create findInteractingBlocks kernel");
......
......@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif
......@@ -38,7 +38,7 @@ __kernel void computeNonbonded(
__local AtomData localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
......@@ -100,7 +100,7 @@ __kernel void computeNonbonded(
}
else {
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
......@@ -126,7 +126,7 @@ __kernel void computeNonbonded(
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -213,7 +213,7 @@ __kernel void computeNonbonded(
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
......@@ -245,7 +245,7 @@ __kernel void computeNonbonded(
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
......@@ -300,7 +300,7 @@ __kernel void computeNonbonded(
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -352,7 +352,7 @@ __kernel void computeNonbonded(
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......
......@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif
......@@ -31,7 +31,7 @@ __kernel void computeNonbonded(
__local AtomData localData[TILE_SIZE];
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+get_group_id(0)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(get_group_id(0)+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
......@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -228,9 +228,9 @@ __kernel void computeNonbonded(
while (pos < end) {
const bool hasExclusions = false;
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
......@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
real invR = RSQRT(r2);
real r = r2*invR;
unsigned int atom2 = j;
......@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment