Commit afbf885d authored by Peter Eastman's avatar Peter Eastman
Browse files

Changes to support compilation on Snow Leopard

parent f5aebbd4
......@@ -85,6 +85,11 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 )
SET( LIB64 )
ENDIF( CMAKE_SIZEOF_VOID_P EQUAL 8 )
# Build 32 bit binaries, since CUDA doesn't currently work with 64 bit
IF (APPLE)
SET (CMAKE_CXX_FLAGS "-arch i386")
SET (CMAKE_C_FLAGS "-arch i386")
ENDIF (APPLE)
IF(UNIX AND NOT CMAKE_BUILD_TYPE)
SET(CMAKE_BUILD_TYPE Debug CACHE STRING "Debug or Release build" FORCE)
......
......@@ -51,10 +51,10 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde
forceBufferPerWarp = false;
numForceBuffers = numAtomBlocks;
}
posq = new OpenCLArray<cl_float4>(*this, paddedNumAtoms, "posq", true);
velm = new OpenCLArray<cl_float4>(*this, paddedNumAtoms, "velm", true);
forceBuffers = new OpenCLArray<cl_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false);
force = new OpenCLArray<cl_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true);
posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true);
velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true);
forceBuffers = new OpenCLArray<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false);
force = new OpenCLArray<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true);
atomIndex = new OpenCLArray<cl_int>(*this, paddedNumAtoms, "atomIndex", true);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex->set(i, i);
......@@ -105,7 +105,7 @@ void OpenCLContext::clearBuffer(OpenCLArray<float>& array) {
queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize));
}
void OpenCLContext::clearBuffer(OpenCLArray<cl_float4>& array) {
void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
clearBufferKernel.setArg<cl_int>(1, array.getSize()*4);
queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize));
......
......@@ -28,13 +28,23 @@
* -------------------------------------------------------------------------- */
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <cl.hpp>
namespace OpenMM {
template <class T>
class OpenCLArray;
/**
* We can't use cl_float4, since different OpenCL implementations currently define it in
* incompatible ways. Hopefully that will be fixed in the future. In the mean time, we
* define our own type to represent float4 on the host.
*/
typedef struct {
cl_float x, y, z, w;
} mm_float4;
/**
* This class contains the information associated with a Context by the OpenCL Platform.
*/
......@@ -60,25 +70,25 @@ public:
/**
* Get the array which contains the position and charge of each atom.
*/
OpenCLArray<cl_float4>& getPosq() {
OpenCLArray<mm_float4>& getPosq() {
return *posq;
}
/**
* Get the array which contains the velocity and massof each atom.
*/
OpenCLArray<cl_float4>& getVelm() {
OpenCLArray<mm_float4>& getVelm() {
return *velm;
}
/**
* Get the array which contains the force on each atom.
*/
OpenCLArray<cl_float4>& getForce() {
OpenCLArray<mm_float4>& getForce() {
return *force;
}
/**
* Get the array which contains the buffers in which forces are computed.
*/
OpenCLArray<cl_float4>& getForceBuffers() {
OpenCLArray<mm_float4>& getForceBuffers() {
return *forceBuffers;
}
/**
......@@ -102,7 +112,7 @@ public:
/**
* Set all elements of an array to 0.
*/
void clearBuffer(OpenCLArray<cl_float4>& array);
void clearBuffer(OpenCLArray<mm_float4>& array);
int numAtoms;
int paddedNumAtoms;
int numAtomBlocks;
......@@ -116,10 +126,10 @@ private:
cl::CommandQueue queue;
cl::Program utilities;
cl::Kernel clearBufferKernel;
OpenCLArray<cl_float4>* posq;
OpenCLArray<cl_float4>* velm;
OpenCLArray<cl_float4>* force;
OpenCLArray<cl_float4>* forceBuffers;
OpenCLArray<mm_float4>* posq;
OpenCLArray<mm_float4>* velm;
OpenCLArray<mm_float4>* force;
OpenCLArray<mm_float4>* forceBuffers;
OpenCLArray<cl_int>* atomIndex;
};
......
......@@ -64,29 +64,29 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
}
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) {
OpenCLArray<cl_float4>& posq = data.context->getPosq();
OpenCLArray<mm_float4>& posq = data.context->getPosq();
posq.download();
OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
cl_float4 pos = posq[i];
mm_float4 pos = posq[i];
// int3 offset = gpu->posCellOffsets[i];
// positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
positions[order[i]] = Vec3(pos.f32[0], pos.f32[1], pos.f32[2]);
positions[order[i]] = Vec3(pos.x, pos.y, pos.z);
}
}
void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) {
OpenCLArray<cl_float4>& posq = data.context->getPosq();
OpenCLArray<mm_float4>& posq = data.context->getPosq();
OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) {
cl_float4& pos = posq[i];
mm_float4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.f32[0] = p[0];
pos.f32[1] = p[1];
pos.f32[2] = p[2];
pos.x = p[0];
pos.y = p[1];
pos.z = p[2];
}
posq.upload();
// for (int i = 0; i < gpu->posCellOffsets.size(); i++)
......@@ -94,40 +94,40 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::
}
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
OpenCLArray<cl_float4>& velm = data.context->getVelm();
OpenCLArray<mm_float4>& velm = data.context->getVelm();
velm.download();
OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
cl_float4 vel = velm[i];
velocities[order[i]] = Vec3(vel.f32[0], vel.f32[1], vel.f32[2]);
mm_float4 vel = velm[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) {
OpenCLArray<cl_float4>& velm = data.context->getVelm();
OpenCLArray<mm_float4>& velm = data.context->getVelm();
OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) {
cl_float4& vel = velm[i];
mm_float4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.f32[0] = p[0];
vel.f32[1] = p[1];
vel.f32[2] = p[2];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
velm.upload();
}
void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) {
OpenCLArray<cl_float4>& force = data.context->getForce();
OpenCLArray<mm_float4>& force = data.context->getForce();
force.download();
OpenCLArray<cl_int>& order = data.context->getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
forces.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
cl_float4 f = force[i];
forces[order[i]] = Vec3(f.f32[0], f.f32[1], f.f32[2]);
mm_float4 f = force[i];
forces[order[i]] = Vec3(f.x, f.y, f.z);
}
}
......@@ -852,11 +852,11 @@ double OpenCLCalcKineticEnergyKernel::execute(ContextImpl& context) {
// We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
// on the CPU.
OpenCLArray<cl_float4>& velm = data.context->getVelm();
OpenCLArray<mm_float4>& velm = data.context->getVelm();
double energy = 0.0;
for (size_t i = 0; i < masses.size(); ++i) {
cl_float4 v = velm[i];
energy += masses[i]*(v.f32[0]*v.f32[0]+v.f32[1]*v.f32[1]+v.f32[2]*v.f32[2]);
mm_float4 v = velm[i];
energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
}
return 0.5*energy;
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment