Commit afbf885d authored by Peter Eastman's avatar Peter Eastman
Browse files

Changes to support compilation on Snow Leopard

parent f5aebbd4
...@@ -85,6 +85,11 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 ) ...@@ -85,6 +85,11 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 )
SET( LIB64 ) SET( LIB64 )
ENDIF( CMAKE_SIZEOF_VOID_P EQUAL 8 ) ENDIF( CMAKE_SIZEOF_VOID_P EQUAL 8 )
# Build 32 bit binaries, since CUDA doesn't currently work with 64 bit
IF (APPLE)
SET (CMAKE_CXX_FLAGS "-arch i386")
SET (CMAKE_C_FLAGS "-arch i386")
ENDIF (APPLE)
IF(UNIX AND NOT CMAKE_BUILD_TYPE) IF(UNIX AND NOT CMAKE_BUILD_TYPE)
SET(CMAKE_BUILD_TYPE Debug CACHE STRING "Debug or Release build" FORCE) SET(CMAKE_BUILD_TYPE Debug CACHE STRING "Debug or Release build" FORCE)
......
...@@ -51,10 +51,10 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde ...@@ -51,10 +51,10 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde
forceBufferPerWarp = false; forceBufferPerWarp = false;
numForceBuffers = numAtomBlocks; numForceBuffers = numAtomBlocks;
} }
posq = new OpenCLArray<cl_float4>(*this, paddedNumAtoms, "posq", true); posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true);
velm = new OpenCLArray<cl_float4>(*this, paddedNumAtoms, "velm", true); velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true);
forceBuffers = new OpenCLArray<cl_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false); forceBuffers = new OpenCLArray<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false);
force = new OpenCLArray<cl_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true); force = new OpenCLArray<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true);
atomIndex = new OpenCLArray<cl_int>(*this, paddedNumAtoms, "atomIndex", true); atomIndex = new OpenCLArray<cl_int>(*this, paddedNumAtoms, "atomIndex", true);
for (int i = 0; i < paddedNumAtoms; ++i) for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex->set(i, i); atomIndex->set(i, i);
...@@ -105,7 +105,7 @@ void OpenCLContext::clearBuffer(OpenCLArray<float>& array) { ...@@ -105,7 +105,7 @@ void OpenCLContext::clearBuffer(OpenCLArray<float>& array) {
queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize)); queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize));
} }
void OpenCLContext::clearBuffer(OpenCLArray<cl_float4>& array) { void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer()); clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
clearBufferKernel.setArg<cl_int>(1, array.getSize()*4); clearBufferKernel.setArg<cl_int>(1, array.getSize()*4);
queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize)); queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize));
......
...@@ -28,13 +28,23 @@ ...@@ -28,13 +28,23 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp> #include <cl.hpp>
namespace OpenMM { namespace OpenMM {
template <class T> template <class T>
class OpenCLArray; class OpenCLArray;
/**
* We can't use cl_float4, since different OpenCL implementations currently define it in
* incompatible ways. Hopefully that will be fixed in the future. In the mean time, we
* define our own type to represent float4 on the host.
*/
typedef struct {
cl_float x, y, z, w;
} mm_float4;
/** /**
* This class contains the information associated with a Context by the OpenCL Platform. * This class contains the information associated with a Context by the OpenCL Platform.
*/ */
...@@ -60,25 +70,25 @@ public: ...@@ -60,25 +70,25 @@ public:
/** /**
* Get the array which contains the position and charge of each atom. * Get the array which contains the position and charge of each atom.
*/ */
OpenCLArray<cl_float4>& getPosq() { OpenCLArray<mm_float4>& getPosq() {
return *posq; return *posq;
} }
/** /**
* Get the array which contains the velocity and massof each atom. * Get the array which contains the velocity and massof each atom.
*/ */
OpenCLArray<cl_float4>& getVelm() { OpenCLArray<mm_float4>& getVelm() {
return *velm; return *velm;
} }
/** /**
* Get the array which contains the force on each atom. * Get the array which contains the force on each atom.
*/ */
OpenCLArray<cl_float4>& getForce() { OpenCLArray<mm_float4>& getForce() {
return *force; return *force;
} }
/** /**
* Get the array which contains the buffers in which forces are computed. * Get the array which contains the buffers in which forces are computed.
*/ */
OpenCLArray<cl_float4>& getForceBuffers() { OpenCLArray<mm_float4>& getForceBuffers() {
return *forceBuffers; return *forceBuffers;
} }
/** /**
...@@ -102,7 +112,7 @@ public: ...@@ -102,7 +112,7 @@ public:
/** /**
* Set all elements of an array to 0. * Set all elements of an array to 0.
*/ */
void clearBuffer(OpenCLArray<cl_float4>& array); void clearBuffer(OpenCLArray<mm_float4>& array);
int numAtoms; int numAtoms;
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
...@@ -116,10 +126,10 @@ private: ...@@ -116,10 +126,10 @@ private:
cl::CommandQueue queue; cl::CommandQueue queue;
cl::Program utilities; cl::Program utilities;
cl::Kernel clearBufferKernel; cl::Kernel clearBufferKernel;
OpenCLArray<cl_float4>* posq; OpenCLArray<mm_float4>* posq;
OpenCLArray<cl_float4>* velm; OpenCLArray<mm_float4>* velm;
OpenCLArray<cl_float4>* force; OpenCLArray<mm_float4>* force;
OpenCLArray<cl_float4>* forceBuffers; OpenCLArray<mm_float4>* forceBuffers;
OpenCLArray<cl_int>* atomIndex; OpenCLArray<cl_int>* atomIndex;
}; };
......
...@@ -64,29 +64,29 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) { ...@@ -64,29 +64,29 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
} }
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) { void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) {
OpenCLArray<cl_float4>& posq = data.context->getPosq(); OpenCLArray<mm_float4>& posq = data.context->getPosq();
posq.download(); posq.download();
OpenCLArray<cl_int>& order = data.context->getAtomIndex(); OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles); positions.resize(numParticles);
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
cl_float4 pos = posq[i]; mm_float4 pos = posq[i];
// int3 offset = gpu->posCellOffsets[i]; // int3 offset = gpu->posCellOffsets[i];
// positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ); // positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
positions[order[i]] = Vec3(pos.f32[0], pos.f32[1], pos.f32[2]); positions[order[i]] = Vec3(pos.x, pos.y, pos.z);
} }
} }
void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) { void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) {
OpenCLArray<cl_float4>& posq = data.context->getPosq(); OpenCLArray<mm_float4>& posq = data.context->getPosq();
OpenCLArray<cl_int>& order = data.context->getAtomIndex(); OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
cl_float4& pos = posq[i]; mm_float4& pos = posq[i];
const Vec3& p = positions[order[i]]; const Vec3& p = positions[order[i]];
pos.f32[0] = p[0]; pos.x = p[0];
pos.f32[1] = p[1]; pos.y = p[1];
pos.f32[2] = p[2]; pos.z = p[2];
} }
posq.upload(); posq.upload();
// for (int i = 0; i < gpu->posCellOffsets.size(); i++) // for (int i = 0; i < gpu->posCellOffsets.size(); i++)
...@@ -94,40 +94,40 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std:: ...@@ -94,40 +94,40 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::
} }
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) { void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
OpenCLArray<cl_float4>& velm = data.context->getVelm(); OpenCLArray<mm_float4>& velm = data.context->getVelm();
velm.download(); velm.download();
OpenCLArray<cl_int>& order = data.context->getAtomIndex(); OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles); velocities.resize(numParticles);
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
cl_float4 vel = velm[i]; mm_float4 vel = velm[i];
velocities[order[i]] = Vec3(vel.f32[0], vel.f32[1], vel.f32[2]); velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
} }
} }
void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) { void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) {
OpenCLArray<cl_float4>& velm = data.context->getVelm(); OpenCLArray<mm_float4>& velm = data.context->getVelm();
OpenCLArray<cl_int>& order = data.context->getAtomIndex(); OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
cl_float4& vel = velm[i]; mm_float4& vel = velm[i];
const Vec3& p = velocities[order[i]]; const Vec3& p = velocities[order[i]];
vel.f32[0] = p[0]; vel.x = p[0];
vel.f32[1] = p[1]; vel.y = p[1];
vel.f32[2] = p[2]; vel.z = p[2];
} }
velm.upload(); velm.upload();
} }
void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) { void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) {
OpenCLArray<cl_float4>& force = data.context->getForce(); OpenCLArray<mm_float4>& force = data.context->getForce();
force.download(); force.download();
OpenCLArray<cl_int>& order = data.context->getAtomIndex(); OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
forces.resize(numParticles); forces.resize(numParticles);
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
cl_float4 f = force[i]; mm_float4 f = force[i];
forces[order[i]] = Vec3(f.f32[0], f.f32[1], f.f32[2]); forces[order[i]] = Vec3(f.x, f.y, f.z);
} }
} }
...@@ -852,11 +852,11 @@ double OpenCLCalcKineticEnergyKernel::execute(ContextImpl& context) { ...@@ -852,11 +852,11 @@ double OpenCLCalcKineticEnergyKernel::execute(ContextImpl& context) {
// We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
// on the CPU. // on the CPU.
OpenCLArray<cl_float4>& velm = data.context->getVelm(); OpenCLArray<mm_float4>& velm = data.context->getVelm();
double energy = 0.0; double energy = 0.0;
for (size_t i = 0; i < masses.size(); ++i) { for (size_t i = 0; i < masses.size(); ++i) {
cl_float4 v = velm[i]; mm_float4 v = velm[i];
energy += masses[i]*(v.f32[0]*v.f32[0]+v.f32[1]*v.f32[1]+v.f32[2]*v.f32[2]); energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
} }
return 0.5*energy; return 0.5*energy;
} }
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment