Changes to support compilation on Snow Leopard

afbf885d · Peter Eastman · f5aebbd4 · afbf885d · afbf885d · afbf885d
Commit afbf885d authored Sep 23, 2009 by Peter Eastman
5 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,6 +85,11 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  SET( LIB64  )
 ENDIF( CMAKE_SIZEOF_VOID_P EQUAL 8 )

+# Build 32 bit binaries, since CUDA doesn't currently work with 64 bit
+IF (APPLE)
+   SET (CMAKE_CXX_FLAGS "-arch i386")
+   SET (CMAKE_C_FLAGS "-arch i386")
+ENDIF (APPLE)

 IF(UNIX AND NOT CMAKE_BUILD_TYPE)
    SET(CMAKE_BUILD_TYPE Debug CACHE STRING "Debug or Release build" FORCE)

--- a/platforms/opencl/src/OpenCLContext.cpp
+++ b/platforms/opencl/src/OpenCLContext.cpp
@@ -51,10 +51,10 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde
        forceBufferPerWarp = false;
        numForceBuffers = numAtomBlocks;
    }
-    posq = new OpenCLArray<cl_float4>(*this, paddedNumAtoms, "posq", true);
-    velm = new OpenCLArray<cl_float4>(*this, paddedNumAtoms, "velm", true);
-    forceBuffers = new OpenCLArray<cl_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false);
-    force = new OpenCLArray<cl_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true);
+    posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true);
+    velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true);
+    forceBuffers = new OpenCLArray<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false);
+    force = new OpenCLArray<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true);
    atomIndex = new OpenCLArray<cl_int>(*this, paddedNumAtoms, "atomIndex", true);
    for (int i = 0; i < paddedNumAtoms; ++i)
        atomIndex->set(i, i);
@@ -105,7 +105,7 @@ void OpenCLContext::clearBuffer(OpenCLArray<float>& array) {
    queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize));
 }

-void OpenCLContext::clearBuffer(OpenCLArray<cl_float4>& array) {
+void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
    clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
    clearBufferKernel.setArg<cl_int>(1, array.getSize()*4);
    queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize));

--- a/platforms/opencl/src/OpenCLContext.h
+++ b/platforms/opencl/src/OpenCLContext.h
@@ -28,13 +28,23 @@
 * -------------------------------------------------------------------------- */

 #define __CL_ENABLE_EXCEPTIONS
-#include <CL/cl.hpp>
+#include <cl.hpp>

 namespace OpenMM {

 template <class T>
 class OpenCLArray;

+/**
+ * We can't use cl_float4, since different OpenCL implementations currently define it in
+ * incompatible ways.  Hopefully that will be fixed in the future.  In the mean time, we
+ * define our own type to represent float4 on the host.
+ */
+
+typedef struct {
+    cl_float x, y, z, w;
+} mm_float4;
+
 /**
 * This class contains the information associated with a Context by the OpenCL Platform.
 */
@@ -60,25 +70,25 @@ public:
    /**
     * Get the array which contains the position and charge of each atom.
     */
-    OpenCLArray<cl_float4>& getPosq() {
+    OpenCLArray<mm_float4>& getPosq() {
        return *posq;
    }
    /**
     * Get the array which contains the velocity and massof each atom.
     */
-    OpenCLArray<cl_float4>& getVelm() {
+    OpenCLArray<mm_float4>& getVelm() {
        return *velm;
    }
    /**
     * Get the array which contains the force on each atom.
     */
-    OpenCLArray<cl_float4>& getForce() {
+    OpenCLArray<mm_float4>& getForce() {
        return *force;
    }
    /**
     * Get the array which contains the buffers in which forces are computed.
     */
-    OpenCLArray<cl_float4>& getForceBuffers() {
+    OpenCLArray<mm_float4>& getForceBuffers() {
        return *forceBuffers;
    }
    /**
@@ -102,7 +112,7 @@ public:
    /**
     * Set all elements of an array to 0.
     */
-    void clearBuffer(OpenCLArray<cl_float4>& array);
+    void clearBuffer(OpenCLArray<mm_float4>& array);
    int numAtoms;
    int paddedNumAtoms;
    int numAtomBlocks;
@@ -116,10 +126,10 @@ private:
    cl::CommandQueue queue;
    cl::Program utilities;
    cl::Kernel clearBufferKernel;
-    OpenCLArray<cl_float4>* posq;
-    OpenCLArray<cl_float4>* velm;
-    OpenCLArray<cl_float4>* force;
-    OpenCLArray<cl_float4>* forceBuffers;
+    OpenCLArray<mm_float4>* posq;
+    OpenCLArray<mm_float4>* velm;
+    OpenCLArray<mm_float4>* force;
+    OpenCLArray<mm_float4>* forceBuffers;
    OpenCLArray<cl_int>* atomIndex;
 };


--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -64,29 +64,29 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
 }

 void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) {
-    OpenCLArray<cl_float4>& posq = data.context->getPosq();
+    OpenCLArray<mm_float4>& posq = data.context->getPosq();
    posq.download();
    OpenCLArray<cl_int>& order = data.context->getAtomIndex();
    int numParticles = context.getSystem().getNumParticles();
    positions.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
-        cl_float4 pos = posq[i];
+        mm_float4 pos = posq[i];
 //        int3 offset = gpu->posCellOffsets[i];
 //        positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
-        positions[order[i]] = Vec3(pos.f32[0], pos.f32[1], pos.f32[2]);
+        positions[order[i]] = Vec3(pos.x, pos.y, pos.z);
    }
 }

 void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) {
-    OpenCLArray<cl_float4>& posq = data.context->getPosq();
+    OpenCLArray<mm_float4>& posq = data.context->getPosq();
    OpenCLArray<cl_int>& order = data.context->getAtomIndex();
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
-        cl_float4& pos = posq[i];
+        mm_float4& pos = posq[i];
        const Vec3& p = positions[order[i]];
-        pos.f32[0] = p[0];
-        pos.f32[1] = p[1];
-        pos.f32[2] = p[2];
+        pos.x = p[0];
+        pos.y = p[1];
+        pos.z = p[2];
    }
    posq.upload();
 //    for (int i = 0; i < gpu->posCellOffsets.size(); i++)
@@ -94,40 +94,40 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::
 }

 void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
-    OpenCLArray<cl_float4>& velm = data.context->getVelm();
+    OpenCLArray<mm_float4>& velm = data.context->getVelm();
    velm.download();
    OpenCLArray<cl_int>& order = data.context->getAtomIndex();
    int numParticles = context.getSystem().getNumParticles();
    velocities.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
-        cl_float4 vel = velm[i];
-        velocities[order[i]] = Vec3(vel.f32[0], vel.f32[1], vel.f32[2]);
+        mm_float4 vel = velm[i];
+        velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
    }
 }

 void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) {
-    OpenCLArray<cl_float4>& velm = data.context->getVelm();
+    OpenCLArray<mm_float4>& velm = data.context->getVelm();
    OpenCLArray<cl_int>& order = data.context->getAtomIndex();
    int numParticles = context.getSystem().getNumParticles();
    for (int i = 0; i < numParticles; ++i) {
-        cl_float4& vel = velm[i];
+        mm_float4& vel = velm[i];
        const Vec3& p = velocities[order[i]];
-        vel.f32[0] = p[0];
-        vel.f32[1] = p[1];
-        vel.f32[2] = p[2];
+        vel.x = p[0];
+        vel.y = p[1];
+        vel.z = p[2];
    }
    velm.upload();
 }

 void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) {
-    OpenCLArray<cl_float4>& force = data.context->getForce();
+    OpenCLArray<mm_float4>& force = data.context->getForce();
    force.download();
    OpenCLArray<cl_int>& order = data.context->getAtomIndex();
    int numParticles = context.getSystem().getNumParticles();
    forces.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
-        cl_float4 f = force[i];
-        forces[order[i]] = Vec3(f.f32[0], f.f32[1], f.f32[2]);
+        mm_float4 f = force[i];
+        forces[order[i]] = Vec3(f.x, f.y, f.z);
    }
 }

@@ -852,11 +852,11 @@ double OpenCLCalcKineticEnergyKernel::execute(ContextImpl& context) {
    // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
    // on the CPU.

-    OpenCLArray<cl_float4>& velm = data.context->getVelm();
+    OpenCLArray<mm_float4>& velm = data.context->getVelm();
    double energy = 0.0;
    for (size_t i = 0; i < masses.size(); ++i) {
-        cl_float4 v = velm[i];
-        energy += masses[i]*(v.f32[0]*v.f32[0]+v.f32[1]*v.f32[1]+v.f32[2]*v.f32[2]);
+        mm_float4 v = velm[i];
+        energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
    }
    return 0.5*energy;
 }

--- a/platforms/opencl/src/cl.hpp
+++ b/platforms/opencl/src/cl.hpp