Commit 387008ce authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement new CUDA platform: checkpointing, parallelization across multiple devices

parent 17ae3aae
......@@ -113,7 +113,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
deviceIndex = i;
bestSpeed = speed;
bestCompute = major;
gpuArchitecture = intToString(major)+intToString(minor);
}
}
}
......@@ -121,6 +120,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
throw OpenMMException("No compatible CUDA device is available");
CHECK_RESULT(cuDeviceGet(&device, deviceIndex));
this->deviceIndex = deviceIndex;
int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
gpuArchitecture = intToString(major)+intToString(minor);
defaultOptimizationOptions = "--use_fast_math";
unsigned int flags = CU_CTX_MAP_HOST;
if (useBlockingSync)
......
......@@ -26,7 +26,7 @@
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
//#include "CudaParallelKernels.h"
#include "CudaParallelKernels.h"
#include "CudaPlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
......@@ -35,38 +35,38 @@ using namespace OpenMM;
KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
CudaPlatform::PlatformData& data = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData());
// if (data.contexts.size() > 1) {
// // We are running in parallel on multiple devices, so we may want to create a parallel kernel.
//
// if (name == CalcForcesAndEnergyKernel::Name())
// return new CudaParallelCalcForcesAndEnergyKernel(name, platform, data);
// if (name == CalcHarmonicBondForceKernel::Name())
// return new CudaParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomBondForceKernel::Name())
// return new CudaParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name())
// return new CudaParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name())
// return new CudaParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// return new CudaParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name())
// return new CudaParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name())
// return new CudaParallelCalcCustomHbondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
// }
if (data.contexts.size() > 1) {
// We are running in parallel on multiple devices, so we may want to create a parallel kernel.
if (name == CalcForcesAndEnergyKernel::Name())
return new CudaParallelCalcForcesAndEnergyKernel(name, platform, data);
if (name == CalcHarmonicBondForceKernel::Name())
return new CudaParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomBondForceKernel::Name())
return new CudaParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
if (name == CalcHarmonicAngleForceKernel::Name())
return new CudaParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomAngleForceKernel::Name())
return new CudaParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
if (name == CalcPeriodicTorsionForceKernel::Name())
return new CudaParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcRBTorsionForceKernel::Name())
return new CudaParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcCMAPTorsionForceKernel::Name())
return new CudaParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomTorsionForceKernel::Name())
return new CudaParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcNonbondedForceKernel::Name())
return new CudaParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomNonbondedForceKernel::Name())
return new CudaParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomExternalForceKernel::Name())
return new CudaParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomHbondForceKernel::Name())
return new CudaParallelCalcCustomHbondForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomCompoundBondForceKernel::Name())
return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
}
CudaContext& cu = *data.contexts[0];
if (name == CalcForcesAndEnergyKernel::Name())
return new CudaCalcForcesAndEnergyKernel(name, platform, cu);
......
......@@ -37,7 +37,7 @@
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h"
//#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "CudaKernelSources.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
......@@ -282,48 +282,62 @@ void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, cons
void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cu.setAsCurrent();
// int version = 1;
// stream.write((char*) &version, sizeof(int));
// double time = cu.getTime();
// stream.write((char*) &time, sizeof(double));
// cu.getPosq().download();
// stream.write((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize());
// cu.getVelm().download();
// stream.write((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize());
// stream.write((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize());
// stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size());
// mm_float4 box = cu.getPeriodicBoxSize();
// stream.write((char*) &box, sizeof(mm_float4));
// cu.getIntegrationUtilities().createCheckpoint(stream);
// SimTKOpenMMUtilities::createCheckpoint(stream);
int version = 1;
stream.write((char*) &version, sizeof(int));
double time = cu.getTime();
stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cu.getComputeForceCount();
stream.write((char*) &computeForceCount, sizeof(int));
int bufferSize = cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
char* buffer = (char*) cu.getPinnedBuffer();
cu.getPosq().download(buffer);
stream.write(buffer, bufferSize);
cu.getVelm().download(buffer);
stream.write(buffer, bufferSize);
stream.write((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
double4 box = cu.getPeriodicBoxSize();
stream.write((char*) &box, sizeof(double4));
cu.getIntegrationUtilities().createCheckpoint(stream);
SimTKOpenMMUtilities::createCheckpoint(stream);
}
void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cu.setAsCurrent();
// int version;
// stream.read((char*) &version, sizeof(int));
// if (version != 1)
// throw OpenMMException("Checkpoint was created with a different version of OpenMM");
// double time;
// stream.read((char*) &time, sizeof(double));
// vector<CudaContext*>& contexts = cu.getPlatformData().contexts;
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->setTime(time);
// stream.read((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize());
// cu.getPosq().upload();
// stream.read((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize());
// cu.getVelm().upload();
// stream.read((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize());
// cu.getAtomIndex().upload();
// stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size());
// mm_float4 box;
// stream.read((char*) &box, sizeof(mm_float4));
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z);
// cu.getIntegrationUtilities().loadCheckpoint(stream);
// SimTKOpenMMUtilities::loadCheckpoint(stream);
// for (int i = 0; i < cu.getReorderListeners().size(); i++)
// cu.getReorderListeners()[i]->execute();
int version;
stream.read((char*) &version, sizeof(int));
if (version != 1)
throw OpenMMException("Checkpoint was created with a different version of OpenMM");
double time;
stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount;
stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int));
vector<CudaContext*>& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount);
}
int bufferSize = cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
char* buffer = (char*) cu.getPinnedBuffer();
stream.read(buffer, bufferSize);
cu.getPosq().upload(buffer);
stream.read(buffer, bufferSize);
cu.getVelm().upload(buffer);
stream.read((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
cu.getAtomIndexArray().upload(cu.getAtomIndex());
stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
double4 box;
stream.read((char*) &box, sizeof(double4));
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z);
cu.getIntegrationUtilities().loadCheckpoint(stream);
SimTKOpenMMUtilities::loadCheckpoint(stream);
for (int i = 0; i < cu.getReorderListeners().size(); i++)
cu.getReorderListeners()[i]->execute();
}
void CudaApplyConstraintsKernel::initialize(const System& system) {
......@@ -840,6 +854,7 @@ private:
};
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
......@@ -926,6 +941,7 @@ private:
};
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
cu.setAsCurrent();
if (params1 != NULL)
delete params1;
if (params2 != NULL)
......@@ -3983,8 +3999,8 @@ CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
}
void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
......@@ -3995,6 +4011,7 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle
}
void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
double dt = integrator.getStepSize();
......@@ -4042,8 +4059,8 @@ CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
}
void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
......@@ -4056,6 +4073,7 @@ void CudaIntegrateLangevinStepKernel::initialize(const System& system, const Lan
}
void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
double temperature = integrator.getTemperature();
......@@ -4120,8 +4138,8 @@ CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
}
void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
......@@ -4133,6 +4151,7 @@ void CudaIntegrateBrownianStepKernel::initialize(const System& system, const Bro
}
void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
double temperature = integrator.getTemperature();
......@@ -4175,8 +4194,8 @@ CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel()
}
void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
......@@ -4188,6 +4207,7 @@ void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, con
}
double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
......@@ -4252,8 +4272,8 @@ CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKerne
}
void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
......@@ -4268,6 +4288,7 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c
}
double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
......@@ -4412,8 +4433,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
}
void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
......@@ -4492,6 +4513,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
}
void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
int numSteps = integrator.getNumComputations();
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaParallelKernels.h"
#include "CudaKernelSources.h"
using namespace OpenMM;
using namespace std;
#define CHECK_RESULT(result) \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
m<<errorMessage<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
}
/**
* Get the current clock time, measured in microseconds.
*/
#ifdef _MSC_VER
#include <Windows.h>
static long long getTime() {
FILETIME ft;
GetSystemTimeAsFileTime(&ft); // 100-nanoseconds since 1-1-1601
ULARGE_INTEGER result;
result.LowPart = ft.dwLowDateTime;
result.HighPart = ft.dwHighDateTime;
return result.QuadPart/10;
}
#else
#include <sys/time.h>
static long long getTime() {
struct timeval tod;
gettimeofday(&tod, 0);
return 1000000*tod.tv_sec+tod.tv_usec;
}
#endif
class CudaParallelCalcForcesAndEnergyKernel::BeginComputationTask : public CudaContext::WorkTask {
public:
BeginComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, void* pinnedMemory) : context(context), cu(cu), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), pinnedMemory(pinnedMemory) {
}
void execute() {
// Copy coordinates over to this device and execute the kernel.
cu.setAsCurrent();
if (cu.getContextIndex() > 0)
cu.getPosq().upload(pinnedMemory, false);
kernel.beginComputation(context, includeForce, includeEnergy, groups);
}
private:
ContextImpl& context;
CudaContext& cu;
CudaCalcForcesAndEnergyKernel& kernel;
bool includeForce, includeEnergy;
int groups;
void* pinnedMemory;
};
class CudaParallelCalcForcesAndEnergyKernel::FinishComputationTask : public CudaContext::WorkTask {
public:
FinishComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, void* pinnedMemory) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory) {
}
void execute() {
// Execute the kernel, then download forces.
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups);
if (includeForce) {
if (cu.getContextIndex() > 0) {
int numAtoms = cu.getPaddedNumAtoms();
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
}
else {
string errorMessage = "Error synchronizing CUDA context";
CHECK_RESULT(cuCtxSynchronize());
}
}
completionTime = getTime();
}
private:
ContextImpl& context;
CudaContext& cu;
CudaCalcForcesAndEnergyKernel& kernel;
bool includeForce, includeEnergy;
int groups;
double& energy;
long long& completionTime;
void* pinnedMemory;
};
CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextTiles(data.contexts.size()), contextForces(NULL),
pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
}
CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent();
if (contextForces != NULL)
delete contextForces;
if (pinnedPositionBuffer != NULL)
cuMemFreeHost(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL)
cuMemFreeHost(pinnedForceBuffer);
}
void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
CudaContext& cu = *data.contexts[0];
cu.setAsCurrent();
CUmodule module = cu.createModule(CudaKernelSources::parallel);
sumKernel = cu.getKernel(module, "sumForces");
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system);
}
void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
CudaContext& cu = *data.contexts[0];
cu.setAsCurrent();
if (contextForces == NULL) {
contextForces = CudaArray::create<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
string errorMessage = "Error allocating pinned memory";
CHECK_RESULT(cuMemHostAlloc(&pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), CU_MEMHOSTALLOC_PORTABLE));
CHECK_RESULT(cuMemHostAlloc(&pinnedPositionBuffer, cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4)), CU_MEMHOSTALLOC_PORTABLE));
}
// Copy coordinates over to each device and execute the kernel.
cu.getPosq().download(pinnedPositionBuffer);
for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0;
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer));
}
}
double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer));
}
data.syncContexts();
double energy = 0.0;
for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i];
if (includeForce) {
// Sum the forces from all devices.
CudaContext& cu = *data.contexts[0];
contextForces->upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms();
int numBuffers = data.contexts.size()-1;
void* args[] = {&cu.getForce().getDevicePointer(), &contextForces->getDevicePointer(), &bufferSize, &numBuffers};
cu.executeKernel(sumKernel, args, bufferSize);
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
// finished last to the one that finished first.
int firstIndex = 0, lastIndex = 0;
int totalTiles = 0;
for (int i = 0; i < (int) completionTimes.size(); i++) {
if (completionTimes[i] < completionTimes[firstIndex])
firstIndex = i;
if (completionTimes[i] > completionTimes[lastIndex])
lastIndex = i;
contextTiles[i] = data.contexts[i]->getNonbondedUtilities().getNumTiles();
totalTiles += contextTiles[i];
}
int tilesToTransfer = totalTiles/1000;
if (tilesToTransfer < 1)
tilesToTransfer = 1;
if (tilesToTransfer > contextTiles[lastIndex])
tilesToTransfer = contextTiles[lastIndex];
contextTiles[firstIndex] += tilesToTransfer;
contextTiles[lastIndex] -= tilesToTransfer;
int startIndex = 0;
for (int i = 0; i < (int) contextTiles.size(); i++) {
data.contexts[i]->getNonbondedUtilities().setTileRange(startIndex, contextTiles[i]);
startIndex += contextTiles[i];
}
}
return energy;
}
class CudaParallelCalcHarmonicBondForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcHarmonicBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcHarmonicBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcHarmonicBondForceKernel::CudaParallelCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcHarmonicBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcHarmonicBondForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCustomBondForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCustomBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCustomBondForceKernel::CudaParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCustomBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCustomBondForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcHarmonicAngleForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcHarmonicAngleForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcHarmonicAngleForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcHarmonicAngleForceKernel::CudaParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcHarmonicAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcHarmonicAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCustomAngleForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomAngleForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCustomAngleForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCustomAngleForceKernel::CudaParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCustomAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCustomAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcPeriodicTorsionForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcPeriodicTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcPeriodicTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcPeriodicTorsionForceKernel::CudaParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcPeriodicTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcPeriodicTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcRBTorsionForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcRBTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcRBTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcRBTorsionForceKernel::CudaParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcRBTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcRBTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCMAPTorsionForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCMAPTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCMAPTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCMAPTorsionForceKernel::CudaParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCMAPTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCMAPTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
class CudaParallelCalcCustomTorsionForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCustomTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCustomTorsionForceKernel::CudaParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCustomTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCustomTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcNonbondedForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcNonbondedForceKernel& kernel, bool includeForce,
bool includeEnergy, bool includeDirect, bool includeReciprocal, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), includeDirect(includeDirect), includeReciprocal(includeReciprocal), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy, includeDirect, includeReciprocal);
}
private:
ContextImpl& context;
CudaCalcNonbondedForceKernel& kernel;
bool includeForce, includeEnergy, includeDirect, includeReciprocal;
double& energy;
};
CudaParallelCalcNonbondedForceKernel::CudaParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcNonbondedForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcNonbondedForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, includeDirect, includeReciprocal, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCustomNonbondedForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCustomNonbondedForceKernel::CudaParallelCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCustomNonbondedForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCustomNonbondedForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCustomExternalForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomExternalForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCustomExternalForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCustomExternalForceKernel::CudaParallelCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCustomExternalForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCustomExternalForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& context, const CustomExternalForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCustomHbondForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomHbondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCustomHbondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCustomHbondForceKernel::CudaParallelCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCustomHbondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCustomHbondForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& context, const CustomHbondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
class CudaParallelCalcCustomCompoundBondForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomCompoundBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
void execute() {
energy += kernel.execute(context, includeForce, includeEnergy);
}
private:
ContextImpl& context;
CudaCalcCustomCompoundBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
CudaParallelCalcCustomCompoundBondForceKernel::CudaParallelCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcCustomCompoundBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcCustomCompoundBondForceKernel(name, platform, *data.contexts[i], system)));
}
void CudaParallelCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double CudaParallelCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
}
void CudaParallelCalcCustomCompoundBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).copyParametersToContext(context, force);
}
#ifndef OPENMM_CUDAPARALLELKERNELS_H_
#define OPENMM_CUDAPARALLELKERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaPlatform.h"
#include "CudaContext.h"
#include "CudaKernels.h"
namespace OpenMM {
/**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any
* necessary work at the end to determine the final results.
*/
class CudaParallelCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
public:
CudaParallelCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data);
~CudaParallelCalcForcesAndEnergyKernel();
CudaCalcForcesAndEnergyKernel& getKernel(int index) {
return dynamic_cast<CudaCalcForcesAndEnergyKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* This is called at the beginning of each force/energy computation, before calcForcesAndEnergy() has been called on
* any ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
void beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
/**
* This is called at the end of each force/energy computation, after calcForcesAndEnergy() has been called on
* every ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
private:
class BeginComputationTask;
class FinishComputationTask;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
std::vector<long long> completionTimes;
std::vector<int> contextTiles;
CudaArray* contextForces;
void* pinnedPositionBuffer;
void* pinnedForceBuffer;
CUfunction sumKernel;
};
/**
* This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
public:
CudaParallelCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcHarmonicBondForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcHarmonicBondForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the HarmonicBondForce this kernel will be used for
*/
void initialize(const System& system, const HarmonicBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the HarmonicBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomBondForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public:
CudaParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCustomBondForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCustomBondForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomBondForce this kernel will be used for
*/
void initialize(const System& system, const CustomBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by HarmonicAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public:
CudaParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcHarmonicAngleForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcHarmonicAngleForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the HarmonicAngleForce this kernel will be used for
*/
void initialize(const System& system, const HarmonicAngleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the HarmonicAngleForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public:
CudaParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCustomAngleForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCustomAngleForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomAngleForce this kernel will be used for
*/
void initialize(const System& system, const CustomAngleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomAngleForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by PeriodicTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public:
CudaParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcPeriodicTorsionForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcPeriodicTorsionForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the PeriodicTorsionForce this kernel will be used for
*/
void initialize(const System& system, const PeriodicTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
class Task;
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the PeriodicTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private:
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by RBTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public:
CudaParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcRBTorsionForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcRBTorsionForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the RBTorsionForce this kernel will be used for
*/
void initialize(const System& system, const RBTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the RBTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CMAPTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public:
CudaParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCMAPTorsionForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCMAPTorsionForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CMAPTorsionForce this kernel will be used for
*/
void initialize(const System& system, const CMAPTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public:
CudaParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCustomTorsionForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCustomTorsionForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomTorsionForce this kernel will be used for
*/
void initialize(const System& system, const CustomTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/
class CudaParallelCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
CudaParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcNonbondedForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcNonbondedForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the NonbondedForce this kernel will be used for
*/
void initialize(const System& system, const NonbondedForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @param includeReciprocal true if reciprocal space interactions should be included
* @param includeReciprocal true if reciprocal space interactions should be included
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the NonbondedForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
*/
class CudaParallelCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public:
CudaParallelCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCustomNonbondedForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCustomNonbondedForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomNonbondedForce this kernel will be used for
*/
void initialize(const System& system, const CustomNonbondedForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomNonbondedForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaParallelCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
public:
CudaParallelCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCustomExternalForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCustomExternalForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomExternalForce this kernel will be used for
*/
void initialize(const System& system, const CustomExternalForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomExternalForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
*/
class CudaParallelCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
public:
CudaParallelCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCustomHbondForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCustomHbondForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomHbondForce this kernel will be used for
*/
void initialize(const System& system, const CustomHbondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomHbondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
* This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
*/
class CudaParallelCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
public:
CudaParallelCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system);
CudaCalcCustomCompoundBondForceKernel& getKernel(int index) {
return dynamic_cast<CudaCalcCustomCompoundBondForceKernel&>(kernels[index].getImpl());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCompoundBondForce this kernel will be used for
*/
void initialize(const System& system, const CustomCompoundBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCompoundBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private:
class Task;
CudaPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
} // namespace OpenMM
#endif /*OPENMM_CUDAPARALLELKERNELS_H_*/
......@@ -101,13 +101,14 @@ __device__ void storeInteractionData(ushort2* buffer, int* valid, short* sum, us
extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockBoundingBox, unsigned int* __restrict__ interactionCount, ushort2* __restrict__ interactingTiles,
unsigned int* __restrict__ interactionFlags, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) {
unsigned int numTiles) {
__shared__ ushort2 buffer[BUFFER_SIZE];
__shared__ int valid[BUFFER_SIZE];
__shared__ short sum[BUFFER_SIZE];
__shared__ ushort2 temp[BUFFER_SIZE];
__shared__ int bufferFull;
__shared__ int globalIndex;
unsigned int endTileIndex = startTileIndex+numTiles;
int valuesInBuffer = 0;
if (threadIdx.x == 0)
bufferFull = false;
......
/**
* Sum the forces computed by different contexts.
*/
extern "C" __global__ void sumForces(long long* __restrict__ force, long long* __restrict__ buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
for (int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x) {
long long sum = force[index];
for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i];
force[index] = sum;
}
}
......@@ -73,34 +73,4 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer(buffer6, size6);
}
/**
* Sum a collection of buffers into the first one.
*/
__global__ void reduceFloat4Buffer(float4* __restrict__ buffer, int bufferSize, int numBuffers) {
int index = blockDim.x*blockIdx.x+threadIdx.x;
int totalSize = bufferSize*numBuffers;
while (index < bufferSize) {
float4 sum = buffer[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += buffer[i];
buffer[index] = sum;
index += blockDim.x*gridDim.x;
}
}
/**
* Sum the various buffers containing forces.
*/
__global__ void reduceForces(const long* __restrict__ longBuffer, float4* __restrict__ buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
float scale = 1.0f/(float) 0xFFFFFFFF;
for (int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x) {
float4 sum = make_float4(scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0.0f);
for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i];
buffer[index] = sum;
}
}
}
\ No newline at end of file
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests creating and loading checkpoints with the CUDA platform.
*/
#include "CudaPlatform.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <sstream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void compareStates(State& s1, State& s2) {
ASSERT_EQUAL_TOL(s1.getTime(), s2.getTime(), TOL);
int numParticles = s1.getPositions().size();
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(s1.getPositions()[i], s2.getPositions()[i], TOL);
ASSERT_EQUAL_VEC(s1.getVelocities()[i], s2.getVelocities()[i], TOL);
Vec3 a1, b1, c1, a2, b2, c2;
s1.getPeriodicBoxVectors(a1, b1, c1);
s2.getPeriodicBoxVectors(a2, b2, c2);
ASSERT_EQUAL_VEC(a1, a2, TOL);
ASSERT_EQUAL_VEC(b1, b2, TOL);
ASSERT_EQUAL_VEC(c1, c2, TOL);
for (map<string, double>::const_iterator iter = s1.getParameters().begin(); iter != s1.getParameters().end(); ++iter)
ASSERT_EQUAL(iter->second, (*s2.getParameters().find(iter->first)).second);
}
}
void testCheckpoint() {
const int numParticles = 100;
const double boxSize = 5.0;
const double temperature = 200.0;
CudaPlatform platform;
System system;
system.addForce(new AndersenThermostat(0.0, 100.0));
NonbondedForce* nonbonded = new NonbondedForce();
system.addForce(nonbonded);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(i%2 == 0 ? 0.1 : -0.1, 0.2, 0.1);
bool clash;
do {
clash = false;
positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
for (int j = 0; j < i; j++) {
Vec3 delta = positions[i]-positions[j];
if (sqrt(delta.dot(delta)) < 0.1)
clash = true;
}
} while (clash);
}
VerletIntegrator integrator(0.001);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
context.setParameter(AndersenThermostat::Temperature(), temperature);
// Run for a little while.
integrator.step(100);
// Record the current state and make a checkpoint.
State s1 = context.getState(State::Positions | State::Velocities | State::Parameters);
stringstream stream1(ios_base::out | ios_base::in | ios_base::binary);
context.createCheckpoint(stream1);
// Continue the simulation for a few more steps and record the state again.
integrator.step(10);
State s2 = context.getState(State::Positions | State::Velocities | State::Parameters);
// Restore from the checkpoint and see if everything gets restored correctly.
context.setPeriodicBoxVectors(Vec3(2*boxSize, 0, 0), Vec3(0, 2*boxSize, 0), Vec3(0, 0, 2*boxSize));
context.setParameter(AndersenThermostat::Temperature(), temperature+10);
context.loadCheckpoint(stream1);
State s3 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s1, s3);
// Now simulate from there and see if the trajectory is identical.
integrator.step(10);
State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s2, s4);
// Create a new Context that uses multiple devices.
string deviceIndex = platform.getPropertyValue(context, CudaPlatform::CudaDeviceIndex());
map<string, string> props;
props[CudaPlatform::CudaDeviceIndex()] = deviceIndex+","+deviceIndex;
VerletIntegrator integrator2(0.001);
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
context2.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
context2.setParameter(AndersenThermostat::Temperature(), temperature);
// Now repeat all of the above tests with it.
integrator2.step(100);
State s5 = context2.getState(State::Positions | State::Velocities | State::Parameters);
stringstream stream2(ios_base::out | ios_base::in | ios_base::binary);
context2.createCheckpoint(stream2);
integrator2.step(10);
State s6 = context2.getState(State::Positions | State::Velocities | State::Parameters);
context2.setPeriodicBoxVectors(Vec3(2*boxSize, 0, 0), Vec3(0, 2*boxSize, 0), Vec3(0, 0, 2*boxSize));
context2.setParameter(AndersenThermostat::Temperature(), temperature+10);
context2.loadCheckpoint(stream2);
State s7 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s5, s7);
integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8);
}
int main() {
try {
testCheckpoint();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
......@@ -164,7 +164,7 @@ void testParallelComputation() {
int main() {
try {
testAngles();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -169,7 +169,7 @@ int main() {
try {
testBonds();
testManyParameters();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -205,7 +205,7 @@ int main() {
try {
testBond();
testPositionDependence();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -166,7 +166,7 @@ int main() {
try {
testForce();
testManyParameters();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -424,7 +424,7 @@ int main() {
testPeriodic();
testTabulatedFunction();
testCoulombLennardJones();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -205,7 +205,7 @@ int main() {
try {
testTorsions();
testRange();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -130,7 +130,7 @@ void testParallelComputation() {
int main() {
try {
testAngles();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -121,7 +121,7 @@ void testParallelComputation() {
int main() {
try {
testBonds();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -814,8 +814,8 @@ int main() {
testBlockInteractions(true);
testDispersionCorrection();
testChangingParameters();
// testParallelComputation(false);
// testParallelComputation(true);
testParallelComputation(false);
testParallelComputation(true);
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -124,7 +124,7 @@ void testParallelComputation() {
int main() {
try {
testPeriodicTorsions();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -143,7 +143,7 @@ void testParallelComputation() {
int main() {
try {
testRBTorsions();
// testParallelComputation();
testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment