Commit 3946c025 authored by peastman's avatar peastman
Browse files

Merge pull request #878 from peastman/applecl

Enabled OpenCL on OS X
parents 013cf20e aacefccf
......@@ -405,11 +405,11 @@ MARK_AS_ADVANCED(CUDA_BUILD_CUBIN)
MARK_AS_ADVANCED(CUDA_BUILD_EMULATION)
FIND_PACKAGE(OpenCL QUIET)
IF(OPENCL_FOUND AND NOT APPLE)
IF(OPENCL_FOUND)
SET(OPENMM_BUILD_OPENCL_LIB ON CACHE BOOL "Build OpenMMOpenCL library")
ELSE(OPENCL_FOUND AND NOT APPLE)
ELSE(OPENCL_FOUND)
SET(OPENMM_BUILD_OPENCL_LIB OFF CACHE BOOL "Build OpenMMOpenCL library")
ENDIF(OPENCL_FOUND AND NOT APPLE)
ENDIF(OPENCL_FOUND)
IF(OPENMM_BUILD_OPENCL_LIB)
ADD_SUBDIRECTORY(platforms/opencl)
ENDIF(OPENMM_BUILD_OPENCL_LIB)
......
......@@ -106,9 +106,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// if they supplied a valid deviceIndex, we only look through that one
if (i != deviceIndex && deviceIndex >= 0 && deviceIndex < (int) devices.size())
continue;
if (platformVendor == "Apple" && (devices[i].getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU || devices[i].getInfo<CL_DEVICE_VENDOR>() == "AMD"))
continue; // The CPU device on OS X won't work correctly, and there are serious bugs using AMD GPUs.
if (platformVendor == "Apple" && (devices[i].getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU))
continue; // The CPU device on OS X won't work correctly.
int maxSize = devices[i].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0];
int processingElementsPerComputeUnit = 8;
if (devices[i].getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
......@@ -170,6 +169,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["WORK_GROUP_SIZE"] = intToString(ThreadBlockSize);
if (platformVendor.size() >= 5 && platformVendor.substr(0, 5) == "Intel")
defaultOptimizationOptions = "";
else if (platformVendor == "Apple")
defaultOptimizationOptions = "-cl-mad-enable -cl-no-signed-zeros";
else
defaultOptimizationOptions = "-cl-fast-relaxed-math";
supports64BitGlobalAtomics = (device.getInfo<CL_DEVICE_EXTENSIONS>().find("cl_khr_int64_base_atomics") != string::npos);
......@@ -241,8 +242,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
}
else
simdWidth = 1;
if (platformVendor == "Apple" && vendor == "AMD")
compilationDefines["MAC_AMD_WORKAROUND"] = "";
if (supports64BitGlobalAtomics)
compilationDefines["SUPPORTS_64_BIT_ATOMICS"] = "";
if (supportsDoublePrecision)
......
......@@ -75,6 +75,7 @@ OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize
defines["PACKED_XSIZE"] = context.intToString(packedXSize);
defines["PACKED_YSIZE"] = context.intToString(packedYSize);
defines["PACKED_ZSIZE"] = context.intToString(packedZSize);
defines["M_PI"] = context.doubleToString(M_PI);
cl::Program program = context.createProgram(OpenCLKernelSources::fftR2C, defines);
packForwardKernel = cl::Kernel(program, "packForwardData");
unpackForwardKernel = cl::Kernel(program, "unpackForwardData");
......
......@@ -1628,6 +1628,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeDefines["GRID_SIZE_Y"] = cl.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cl.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cl.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cl.doubleToString(M_PI);
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (deviceIsCpu)
pmeDefines["DEVICE_IS_CPU"] = "1";
......
......@@ -109,7 +109,7 @@ bool OpenCLPlatform::supportsDoublePrecision() const {
bool OpenCLPlatform::isPlatformSupported() {
// Return false for OpenCL implementations that are known
// to be buggy (Apple OSX since 10.7.5)
// to be buggy (Apple OS X prior to 10.10).
#ifdef __APPLE__
char str[256];
......@@ -122,12 +122,10 @@ bool OpenCLPlatform::isPlatformSupported() {
if (sscanf(str, "%d.%d.%d", &major, &minor, &micro) != 3)
return false;
if ((major > 11) || (major == 11 && minor > 4) || (major == 11 && minor == 4 && micro >= 2))
// 11.4.2 is the darwin release corresponding to OSX 10.7.5, which is the
// point at which a number of serious bugs were introduced into the
// Apple OpenCL libraries, resulting in catistrophically incorrect MD simulations
// (see https://github.com/SimTk/openmm/issues/395 for example). Once a fix is released,
// this version check should be updated.
if (major < 14 || (major == 14 && minor < 3))
// 14.3.0 is the darwin release corresponding to OS X 10.10.3. Versions prior to that
// contained a number of serious bugs in the Apple OpenCL libraries.
// (See https://github.com/SimTk/openmm/issues/395 for example.)
return false;
#endif
......
......@@ -42,7 +42,6 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
replacements["MIN_KEY"] = trait->getMinKey();
replacements["MAX_KEY"] = trait->getMaxKey();
replacements["MAX_VALUE"] = trait->getMaxValue();
replacements["VALUE_IS_INT2"] = (trait->getDataType() == std::string("int2") ? "1" : "0");
cl::Program program = context.createProgram(context.replaceStrings(OpenCLKernelSources::sort, replacements));
shortListKernel = cl::Kernel(program, "sortShortList");
computeRangeKernel = cl::Kernel(program, "computeRange");
......
......@@ -109,13 +109,7 @@ __kernel void assignElementsToBuckets(__global const DATA_TYPE* restrict data, u
float maxValue = (float) (range[1]);
float bucketWidth = (maxValue-minValue)/numBuckets;
for (uint index = get_global_id(0); index < length; index += get_global_size(0)) {
#if defined(MAC_AMD_WORKAROUND) && VALUE_IS_INT2
__global int* d = (__global int*) data;
int2 element = (int2) (d[2*index], d[2*index+1]);
float key = (float) getValue(element);
#else
float key = (float) getValue(data[index]);
#endif
uint bucketIndex = min((uint) ((key-minValue)/bucketWidth), numBuckets-1);
offsetInBucket[index] = atom_inc(&bucketOffset[bucketIndex]);
bucketOfElement[index] = bucketIndex;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment