Commit 6e3526b4 authored by Peter Eastman's avatar Peter Eastman
Browse files

Use a separate host thread for each device

parent b51a1b3c
......@@ -68,7 +68,9 @@ public:
PlatformData(int numParticles, const std::string& deviceIndexProperty);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
std::vector<OpenCLContext*> contexts;
std::vector<double> contextEnergy;
bool removeCM;
int cmMotionFrequency;
int stepCount, computeForceCount;
......
......@@ -54,7 +54,7 @@ const int OpenCLContext::TileSize = 32;
OpenCLContext::OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::PlatformData& platformData) :
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), posq(NULL), velm(NULL),
forceBuffers(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL), nonbonded(NULL) {
forceBuffers(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL), nonbonded(NULL), thread(NULL) {
try {
contextIndex = platformData.contexts.size();
std::vector<cl::Platform> platforms;
......@@ -166,6 +166,10 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::
compilationOptions += " -DLOG=native_log";
else
compilationOptions += " -DLOG=log";
// Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread();
}
OpenCLContext::~OpenCLContext() {
......@@ -187,6 +191,8 @@ OpenCLContext::~OpenCLContext() {
delete integration;
if (nonbonded != NULL)
delete nonbonded;
if (thread != NULL)
delete thread;
}
void OpenCLContext::initialize(const System& system) {
......@@ -625,3 +631,86 @@ void OpenCLContext::reorderAtoms() {
velm->upload();
atomIndex->upload();
}
struct OpenCLContext::WorkThread::ThreadData {
ThreadData(std::queue<OpenCLContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
tasks(tasks), waiting(waiting), finished(finished), queueLock(queueLock),
waitForTaskCondition(waitForTaskCondition), queueEmptyCondition(queueEmptyCondition) {
}
std::queue<OpenCLContext::WorkTask*>& tasks;
bool& waiting;
bool& finished;
pthread_mutex_t& queueLock;
pthread_cond_t& waitForTaskCondition;
pthread_cond_t& queueEmptyCondition;
};
static void* threadBody(void* args) {
OpenCLContext::WorkThread::ThreadData& data = *reinterpret_cast<OpenCLContext::WorkThread::ThreadData*>(args);
while (!data.finished || data.tasks.size() > 0) {
pthread_mutex_lock(&data.queueLock);
while (data.tasks.empty() && !data.finished) {
data.waiting = true;
pthread_cond_signal(&data.queueEmptyCondition);
pthread_cond_wait(&data.waitForTaskCondition, &data.queueLock);
}
OpenCLContext::WorkTask* task = NULL;
if (!data.tasks.empty()) {
data.waiting = false;
task = data.tasks.front();
data.tasks.pop();
}
pthread_mutex_unlock(&data.queueLock);
if (task != NULL) {
task->execute();
delete task;
}
}
data.waiting = true;
pthread_cond_signal(&data.queueEmptyCondition);
delete &data;
return 0;
}
OpenCLContext::WorkThread::WorkThread() : waiting(true), finished(false) {
pthread_mutex_init(&queueLock, NULL);
pthread_cond_init(&waitForTaskCondition, NULL);
pthread_cond_init(&queueEmptyCondition, NULL);
ThreadData* data = new ThreadData(tasks, waiting, finished, queueLock, waitForTaskCondition, queueEmptyCondition);
pthread_create(&thread, NULL, threadBody, data);
}
OpenCLContext::WorkThread::~WorkThread() {
pthread_mutex_lock(&queueLock);
finished = true;
pthread_cond_broadcast(&waitForTaskCondition);
pthread_mutex_unlock(&queueLock);
pthread_join(thread, NULL);
pthread_mutex_destroy(&queueLock);
pthread_cond_destroy(&waitForTaskCondition);
pthread_cond_destroy(&queueEmptyCondition);
}
void OpenCLContext::WorkThread::addTask(OpenCLContext::WorkTask* task) {
pthread_mutex_lock(&queueLock);
tasks.push(task);
waiting = false;
pthread_cond_signal(&waitForTaskCondition);
pthread_mutex_unlock(&queueLock);
}
bool OpenCLContext::WorkThread::isWaiting() {
return waiting;
}
bool OpenCLContext::WorkThread::isFinished() {
return finished;
}
void OpenCLContext::WorkThread::flush() {
pthread_mutex_lock(&queueLock);
while (!waiting)
pthread_cond_wait(&queueEmptyCondition, &queueLock);
pthread_mutex_unlock(&queueLock);
}
......@@ -28,7 +28,9 @@
* -------------------------------------------------------------------------- */
#include <map>
#include <queue>
#include <string>
#include <pthread.h>
#define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER
// Prevent Windows from defining macros that interfere with other code.
......@@ -130,10 +132,16 @@ struct mm_int16 {
* specific to a particular device, and manages data structures and kernels for that device. When running a simulation
* in parallel on multiple devices, there is a separate OpenCLContext for each one. The list of all contexts is
* stored in the OpenCLPlatform::PlatformData.
* <p>
* In addition, a worker thread is created for each OpenCLContext. This is used for parallel computations, so that
* blocking calls to one device will not block other devices. When only a single device is being used, the worker
* thread is not used and calculations are performed on the main application thread.
*/
class OPENMM_EXPORT OpenCLContext {
public:
class WorkTask;
class WorkThread;
static const int ThreadBlockSize;
static const int TileSize;
OpenCLContext(int numParticles, int deviceIndex, OpenCLPlatform::PlatformData& platformData);
......@@ -407,6 +415,12 @@ public:
OpenCLNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded;
}
/**
* Get the thread used by this context for executing parallel computations.
*/
WorkThread& getWorkThread() {
return *thread;
}
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
......@@ -454,6 +468,7 @@ private:
std::vector<int> autoclearBufferSizes;
OpenCLIntegrationUtilities* integration;
OpenCLNonbondedUtilities* nonbonded;
WorkThread* thread;
};
struct OpenCLContext::MoleculeGroup {
......@@ -461,6 +476,44 @@ struct OpenCLContext::MoleculeGroup {
std::vector<int> instances;
};
/**
* This abstract class defines a task to be executed on the worker thread.
*/
class OpenCLContext::WorkTask {
public:
virtual void execute() = 0;
};
class OpenCLContext::WorkThread {
public:
struct ThreadData;
WorkThread();
~WorkThread();
/**
* Request that a task be executed on the worker thread. The argument should have been allocated on the
* heap with the "new" operator. After its execute() method finishes, the object will be deleted automatically.
*/
void addTask(OpenCLContext::WorkTask* task);
/**
* Get whether the worker thread is idle, waiting for a task to be added.
*/
bool isWaiting();
/**
* Get whether the worker thread has exited.
*/
bool isFinished();
/**
* Block until all tasks have finished executing and the worker thread is idle.
*/
void flush();
private:
std::queue<OpenCLContext::WorkTask*> tasks;
bool waiting, finished;
pthread_mutex_t queueLock;
pthread_cond_t waitForTaskCondition, queueEmptyCondition;
pthread_t thread;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLCONTEXT_H_*/
......@@ -56,6 +56,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcNonbondedForceKernel::Name())
return new OpenCLParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomNonbondedForceKernel::Name())
return new OpenCLParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomExternalForceKernel::Name())
return new OpenCLParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomHbondForceKernel::Name())
......
......@@ -29,6 +29,48 @@
using namespace OpenMM;
using namespace std;
class OpenCLParallelCalcForcesAndEnergyKernel::BeginComputationTask : public OpenCLContext::WorkTask {
public:
BeginComputationTask(ContextImpl& context, OpenCLContext& cl, OpenCLCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy) : context(context), cl(cl), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy) {
}
void execute() {
// Copy coordinates over to this device and execute the kernel.
if (cl.getContextIndex() > 0)
cl.getPosq().upload(cl.getPlatformData().contexts[0]->getPosq().getHostBuffer());
kernel.beginComputation(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLContext& cl;
OpenCLCalcForcesAndEnergyKernel& kernel;
bool includeForce, includeEnergy;
};
class OpenCLParallelCalcForcesAndEnergyKernel::FinishComputationTask : public OpenCLContext::WorkTask {
public:
FinishComputationTask(ContextImpl& context, OpenCLContext& cl, OpenCLCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, double& energy) : context(context), cl(cl), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
// Execute the kernel, then download forces.
energy += kernel.finishComputation(context, includeForce, includeEnergy);
if (includeForce)
cl.getForce().download();
mm_float4 f = cl.getForce()[0];
}
private:
ContextImpl& context;
OpenCLContext& cl;
OpenCLCalcForcesAndEnergyKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -41,23 +83,27 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy) {
// Copy coordinates over to each device.
OpenCLContext& mainContext = *data.contexts[0];
mainContext.getPosq().download();
for (int i = 1; i < (int) data.contexts.size(); i++)
data.contexts[i]->getPosq().upload(mainContext.getPosq().getHostBuffer());
// Execute the kernel on each device.
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).beginComputation(context, includeForce, includeEnergy);
// Copy coordinates over to each device and execute the kernel.
data.contexts[0]->getPosq().download();
for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0;
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new BeginComputationTask(context, cl, getKernel(i), includeForce, includeEnergy));
}
}
double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new FinishComputationTask(context, cl, getKernel(i), includeForce, includeEnergy, data.contextEnergy[i]));
}
data.syncContexts();
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).finishComputation(context, includeForce, includeEnergy);
for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i];
if (includeForce) {
// Sum the forces from all devices.
......@@ -79,6 +125,22 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
return energy;
}
class OpenCLParallelCalcHarmonicBondForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcHarmonicBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcHarmonicBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcHarmonicBondForceKernel::OpenCLParallelCalcHarmonicBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcHarmonicBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -91,12 +153,30 @@ void OpenCLParallelCalcHarmonicBondForceKernel::initialize(const System& system,
}
double OpenCLParallelCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcCustomBondForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcCustomBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcCustomBondForceKernel::OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -109,12 +189,30 @@ void OpenCLParallelCalcCustomBondForceKernel::initialize(const System& system, c
}
double OpenCLParallelCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcHarmonicAngleForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcHarmonicAngleForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcHarmonicAngleForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcHarmonicAngleForceKernel::OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcHarmonicAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -127,12 +225,30 @@ void OpenCLParallelCalcHarmonicAngleForceKernel::initialize(const System& system
}
double OpenCLParallelCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcCustomAngleForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomAngleForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcCustomAngleForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcCustomAngleForceKernel::OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -145,12 +261,30 @@ void OpenCLParallelCalcCustomAngleForceKernel::initialize(const System& system,
}
double OpenCLParallelCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcPeriodicTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcPeriodicTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcPeriodicTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcPeriodicTorsionForceKernel::OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcPeriodicTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -163,12 +297,30 @@ void OpenCLParallelCalcPeriodicTorsionForceKernel::initialize(const System& syst
}
double OpenCLParallelCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcRBTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcRBTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcRBTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcRBTorsionForceKernel::OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcRBTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -181,12 +333,30 @@ void OpenCLParallelCalcRBTorsionForceKernel::initialize(const System& system, co
}
double OpenCLParallelCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcCMAPTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCMAPTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcCMAPTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcCMAPTorsionForceKernel::OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCMAPTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -199,12 +369,30 @@ void OpenCLParallelCalcCMAPTorsionForceKernel::initialize(const System& system,
}
double OpenCLParallelCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcCustomTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcCustomTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcCustomTorsionForceKernel::OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -217,12 +405,30 @@ void OpenCLParallelCalcCustomTorsionForceKernel::initialize(const System& system
}
double OpenCLParallelCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcNonbondedForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcNonbondedForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcNonbondedForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcNonbondedForceKernel::OpenCLParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcNonbondedForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -235,12 +441,66 @@ void OpenCLParallelCalcNonbondedForceKernel::initialize(const System& system, co
}
double OpenCLParallelCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcCustomNonbondedForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomNonbondedForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcCustomNonbondedForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcCustomNonbondedForceKernel::OpenCLParallelCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomNonbondedForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomNonbondedForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcCustomExternalForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomExternalForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcCustomExternalForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcCustomExternalForceKernel::OpenCLParallelCalcCustomExternalForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomExternalForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -253,12 +513,30 @@ void OpenCLParallelCalcCustomExternalForceKernel::initialize(const System& syste
}
double OpenCLParallelCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class OpenCLParallelCalcCustomHbondForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomHbondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
OpenCLCalcCustomHbondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
OpenCLParallelCalcCustomHbondForceKernel::OpenCLParallelCalcCustomHbondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomHbondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
......@@ -271,8 +549,10 @@ void OpenCLParallelCalcCustomHbondForceKernel::initialize(const System& system,
}
double OpenCLParallelCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
......@@ -72,6 +72,8 @@ public:
*/
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy);
private:
class BeginComputationTask;
class FinishComputationTask;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -102,6 +104,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -132,6 +135,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -162,6 +166,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -192,6 +197,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -221,6 +227,7 @@ public:
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
class Task;
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
......@@ -252,6 +259,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -282,6 +290,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -312,6 +321,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -342,6 +352,38 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
*/
class OpenCLParallelCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public:
OpenCLParallelCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcCustomNonbondedForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomNonbondedForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomNonbondedForce this kernel will be used for
*/
void initialize(const System& system, const CustomNonbondedForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -372,6 +414,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......@@ -402,6 +445,7 @@ public:
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
......
......@@ -114,7 +114,7 @@ OpenCLPlatform::PlatformData::PlatformData(int numParticles, const string& devic
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(deviceIndexProperty) >> deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(numParticles, deviceIndex, *this));
}
}
......@@ -127,6 +127,7 @@ OpenCLPlatform::PlatformData::PlatformData(int numParticles, const string& devic
device << contexts[i]->getDeviceIndex();
}
propertyValues[OpenCLPlatform::OpenCLDeviceIndex()] = device.str();
contextEnergy.resize(contexts.size());
}
OpenCLPlatform::PlatformData::~PlatformData() {
......@@ -138,3 +139,8 @@ void OpenCLPlatform::PlatformData::initializeContexts(const System& system) {
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->initialize(system);
}
void OpenCLPlatform::PlatformData::syncContexts() {
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->getWorkThread().flush();
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment