Commit 387008ce authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement new CUDA platform: checkpointing, parallelization across multiple devices

parent 17ae3aae
...@@ -113,7 +113,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -113,7 +113,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
deviceIndex = i; deviceIndex = i;
bestSpeed = speed; bestSpeed = speed;
bestCompute = major; bestCompute = major;
gpuArchitecture = intToString(major)+intToString(minor);
} }
} }
} }
...@@ -121,6 +120,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -121,6 +120,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
throw OpenMMException("No compatible CUDA device is available"); throw OpenMMException("No compatible CUDA device is available");
CHECK_RESULT(cuDeviceGet(&device, deviceIndex)); CHECK_RESULT(cuDeviceGet(&device, deviceIndex));
this->deviceIndex = deviceIndex; this->deviceIndex = deviceIndex;
int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
gpuArchitecture = intToString(major)+intToString(minor);
defaultOptimizationOptions = "--use_fast_math"; defaultOptimizationOptions = "--use_fast_math";
unsigned int flags = CU_CTX_MAP_HOST; unsigned int flags = CU_CTX_MAP_HOST;
if (useBlockingSync) if (useBlockingSync)
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include "CudaKernelFactory.h" #include "CudaKernelFactory.h"
#include "CudaKernels.h" #include "CudaKernels.h"
//#include "CudaParallelKernels.h" #include "CudaParallelKernels.h"
#include "CudaPlatform.h" #include "CudaPlatform.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
...@@ -35,38 +35,38 @@ using namespace OpenMM; ...@@ -35,38 +35,38 @@ using namespace OpenMM;
KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const { KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
CudaPlatform::PlatformData& data = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData()); CudaPlatform::PlatformData& data = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData());
// if (data.contexts.size() > 1) { if (data.contexts.size() > 1) {
// // We are running in parallel on multiple devices, so we may want to create a parallel kernel. // We are running in parallel on multiple devices, so we may want to create a parallel kernel.
//
// if (name == CalcForcesAndEnergyKernel::Name()) if (name == CalcForcesAndEnergyKernel::Name())
// return new CudaParallelCalcForcesAndEnergyKernel(name, platform, data); return new CudaParallelCalcForcesAndEnergyKernel(name, platform, data);
// if (name == CalcHarmonicBondForceKernel::Name()) if (name == CalcHarmonicBondForceKernel::Name())
// return new CudaParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomBondForceKernel::Name()) if (name == CalcCustomBondForceKernel::Name())
// return new CudaParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name()) if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name()) if (name == CalcCustomAngleForceKernel::Name())
// return new CudaParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name()) if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name()) if (name == CalcRBTorsionForceKernel::Name())
// return new CudaParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name()) if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name()) if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name()) if (name == CalcNonbondedForceKernel::Name())
// return new CudaParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name()) if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name()) if (name == CalcCustomExternalForceKernel::Name())
// return new CudaParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name()) if (name == CalcCustomHbondForceKernel::Name())
// return new CudaParallelCalcCustomHbondForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCustomHbondForceKernel(name, platform, data, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name()) if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem()); return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
// } }
CudaContext& cu = *data.contexts[0]; CudaContext& cu = *data.contexts[0];
if (name == CalcForcesAndEnergyKernel::Name()) if (name == CalcForcesAndEnergyKernel::Name())
return new CudaCalcForcesAndEnergyKernel(name, platform, cu); return new CudaCalcForcesAndEnergyKernel(name, platform, cu);
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#include "CudaBondedUtilities.h" #include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h" #include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h" #include "CudaIntegrationUtilities.h"
//#include "CudaNonbondedUtilities.h" #include "CudaNonbondedUtilities.h"
#include "CudaKernelSources.h" #include "CudaKernelSources.h"
#include "lepton/ExpressionTreeNode.h" #include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h" #include "lepton/Operation.h"
...@@ -282,48 +282,62 @@ void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, cons ...@@ -282,48 +282,62 @@ void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, cons
void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) { void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cu.setAsCurrent(); cu.setAsCurrent();
// int version = 1; int version = 1;
// stream.write((char*) &version, sizeof(int)); stream.write((char*) &version, sizeof(int));
// double time = cu.getTime(); double time = cu.getTime();
// stream.write((char*) &time, sizeof(double)); stream.write((char*) &time, sizeof(double));
// cu.getPosq().download(); int stepCount = cu.getStepCount();
// stream.write((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize()); stream.write((char*) &stepCount, sizeof(int));
// cu.getVelm().download(); int computeForceCount = cu.getComputeForceCount();
// stream.write((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize()); stream.write((char*) &computeForceCount, sizeof(int));
// stream.write((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize()); int bufferSize = cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
// stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size()); char* buffer = (char*) cu.getPinnedBuffer();
// mm_float4 box = cu.getPeriodicBoxSize(); cu.getPosq().download(buffer);
// stream.write((char*) &box, sizeof(mm_float4)); stream.write(buffer, bufferSize);
// cu.getIntegrationUtilities().createCheckpoint(stream); cu.getVelm().download(buffer);
// SimTKOpenMMUtilities::createCheckpoint(stream); stream.write(buffer, bufferSize);
stream.write((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
double4 box = cu.getPeriodicBoxSize();
stream.write((char*) &box, sizeof(double4));
cu.getIntegrationUtilities().createCheckpoint(stream);
SimTKOpenMMUtilities::createCheckpoint(stream);
} }
void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) { void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cu.setAsCurrent(); cu.setAsCurrent();
// int version; int version;
// stream.read((char*) &version, sizeof(int)); stream.read((char*) &version, sizeof(int));
// if (version != 1) if (version != 1)
// throw OpenMMException("Checkpoint was created with a different version of OpenMM"); throw OpenMMException("Checkpoint was created with a different version of OpenMM");
// double time; double time;
// stream.read((char*) &time, sizeof(double)); stream.read((char*) &time, sizeof(double));
// vector<CudaContext*>& contexts = cu.getPlatformData().contexts; int stepCount, computeForceCount;
// for (int i = 0; i < (int) contexts.size(); i++) stream.read((char*) &stepCount, sizeof(int));
// contexts[i]->setTime(time); stream.read((char*) &computeForceCount, sizeof(int));
// stream.read((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize()); vector<CudaContext*>& contexts = cu.getPlatformData().contexts;
// cu.getPosq().upload(); for (int i = 0; i < (int) contexts.size(); i++) {
// stream.read((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize()); contexts[i]->setTime(time);
// cu.getVelm().upload(); contexts[i]->setStepCount(stepCount);
// stream.read((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize()); contexts[i]->setComputeForceCount(computeForceCount);
// cu.getAtomIndex().upload(); }
// stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size()); int bufferSize = cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
// mm_float4 box; char* buffer = (char*) cu.getPinnedBuffer();
// stream.read((char*) &box, sizeof(mm_float4)); stream.read(buffer, bufferSize);
// for (int i = 0; i < (int) contexts.size(); i++) cu.getPosq().upload(buffer);
// contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z); stream.read(buffer, bufferSize);
// cu.getIntegrationUtilities().loadCheckpoint(stream); cu.getVelm().upload(buffer);
// SimTKOpenMMUtilities::loadCheckpoint(stream); stream.read((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
// for (int i = 0; i < cu.getReorderListeners().size(); i++) cu.getAtomIndexArray().upload(cu.getAtomIndex());
// cu.getReorderListeners()[i]->execute(); stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
double4 box;
stream.read((char*) &box, sizeof(double4));
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z);
cu.getIntegrationUtilities().loadCheckpoint(stream);
SimTKOpenMMUtilities::loadCheckpoint(stream);
for (int i = 0; i < cu.getReorderListeners().size(); i++)
cu.getReorderListeners()[i]->execute();
} }
void CudaApplyConstraintsKernel::initialize(const System& system) { void CudaApplyConstraintsKernel::initialize(const System& system) {
...@@ -840,6 +854,7 @@ private: ...@@ -840,6 +854,7 @@ private:
}; };
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() { CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
} }
...@@ -926,6 +941,7 @@ private: ...@@ -926,6 +941,7 @@ private:
}; };
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() { CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
cu.setAsCurrent();
if (params1 != NULL) if (params1 != NULL)
delete params1; delete params1;
if (params2 != NULL) if (params2 != NULL)
...@@ -3983,8 +3999,8 @@ CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() { ...@@ -3983,8 +3999,8 @@ CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
} }
void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) { void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
...@@ -3995,6 +4011,7 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle ...@@ -3995,6 +4011,7 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle
} }
void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) { void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
double dt = integrator.getStepSize(); double dt = integrator.getStepSize();
...@@ -4042,8 +4059,8 @@ CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() { ...@@ -4042,8 +4059,8 @@ CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
} }
void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) { void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
...@@ -4056,6 +4073,7 @@ void CudaIntegrateLangevinStepKernel::initialize(const System& system, const Lan ...@@ -4056,6 +4073,7 @@ void CudaIntegrateLangevinStepKernel::initialize(const System& system, const Lan
} }
void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) { void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
double temperature = integrator.getTemperature(); double temperature = integrator.getTemperature();
...@@ -4120,8 +4138,8 @@ CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() { ...@@ -4120,8 +4138,8 @@ CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
} }
void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) { void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
...@@ -4133,6 +4151,7 @@ void CudaIntegrateBrownianStepKernel::initialize(const System& system, const Bro ...@@ -4133,6 +4151,7 @@ void CudaIntegrateBrownianStepKernel::initialize(const System& system, const Bro
} }
void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) { void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
double temperature = integrator.getTemperature(); double temperature = integrator.getTemperature();
...@@ -4175,8 +4194,8 @@ CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() ...@@ -4175,8 +4194,8 @@ CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel()
} }
void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) { void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
...@@ -4188,6 +4207,7 @@ void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, con ...@@ -4188,6 +4207,7 @@ void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, con
} }
double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) { double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
...@@ -4252,8 +4272,8 @@ CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKerne ...@@ -4252,8 +4272,8 @@ CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKerne
} }
void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) { void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
...@@ -4268,6 +4288,7 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c ...@@ -4268,6 +4288,7 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c
} }
double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) { double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
...@@ -4412,8 +4433,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() { ...@@ -4412,8 +4433,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
} }
void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) { void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables(); numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
...@@ -4492,6 +4513,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari ...@@ -4492,6 +4513,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
} }
void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) { void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
int numSteps = integrator.getNumComputations(); int numSteps = integrator.getNumComputations();
......
This diff is collapsed.
This diff is collapsed.
...@@ -101,13 +101,14 @@ __device__ void storeInteractionData(ushort2* buffer, int* valid, short* sum, us ...@@ -101,13 +101,14 @@ __device__ void storeInteractionData(ushort2* buffer, int* valid, short* sum, us
extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ blockCenter, extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockBoundingBox, unsigned int* __restrict__ interactionCount, ushort2* __restrict__ interactingTiles, const real4* __restrict__ blockBoundingBox, unsigned int* __restrict__ interactionCount, ushort2* __restrict__ interactingTiles,
unsigned int* __restrict__ interactionFlags, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startTileIndex, unsigned int* __restrict__ interactionFlags, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) { unsigned int numTiles) {
__shared__ ushort2 buffer[BUFFER_SIZE]; __shared__ ushort2 buffer[BUFFER_SIZE];
__shared__ int valid[BUFFER_SIZE]; __shared__ int valid[BUFFER_SIZE];
__shared__ short sum[BUFFER_SIZE]; __shared__ short sum[BUFFER_SIZE];
__shared__ ushort2 temp[BUFFER_SIZE]; __shared__ ushort2 temp[BUFFER_SIZE];
__shared__ int bufferFull; __shared__ int bufferFull;
__shared__ int globalIndex; __shared__ int globalIndex;
unsigned int endTileIndex = startTileIndex+numTiles;
int valuesInBuffer = 0; int valuesInBuffer = 0;
if (threadIdx.x == 0) if (threadIdx.x == 0)
bufferFull = false; bufferFull = false;
......
/**
* Sum the forces computed by different contexts.
*/
extern "C" __global__ void sumForces(long long* __restrict__ force, long long* __restrict__ buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
for (int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x) {
long long sum = force[index];
for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i];
force[index] = sum;
}
}
...@@ -73,34 +73,4 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res ...@@ -73,34 +73,4 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer(buffer6, size6); clearSingleBuffer(buffer6, size6);
} }
/**
* Sum a collection of buffers into the first one.
*/
__global__ void reduceFloat4Buffer(float4* __restrict__ buffer, int bufferSize, int numBuffers) {
int index = blockDim.x*blockIdx.x+threadIdx.x;
int totalSize = bufferSize*numBuffers;
while (index < bufferSize) {
float4 sum = buffer[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += buffer[i];
buffer[index] = sum;
index += blockDim.x*gridDim.x;
}
}
/**
* Sum the various buffers containing forces.
*/
__global__ void reduceForces(const long* __restrict__ longBuffer, float4* __restrict__ buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
float scale = 1.0f/(float) 0xFFFFFFFF;
for (int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x) {
float4 sum = make_float4(scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0.0f);
for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i];
buffer[index] = sum;
}
}
} }
\ No newline at end of file
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests creating and loading checkpoints with the CUDA platform.
*/
#include "CudaPlatform.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <sstream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void compareStates(State& s1, State& s2) {
ASSERT_EQUAL_TOL(s1.getTime(), s2.getTime(), TOL);
int numParticles = s1.getPositions().size();
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(s1.getPositions()[i], s2.getPositions()[i], TOL);
ASSERT_EQUAL_VEC(s1.getVelocities()[i], s2.getVelocities()[i], TOL);
Vec3 a1, b1, c1, a2, b2, c2;
s1.getPeriodicBoxVectors(a1, b1, c1);
s2.getPeriodicBoxVectors(a2, b2, c2);
ASSERT_EQUAL_VEC(a1, a2, TOL);
ASSERT_EQUAL_VEC(b1, b2, TOL);
ASSERT_EQUAL_VEC(c1, c2, TOL);
for (map<string, double>::const_iterator iter = s1.getParameters().begin(); iter != s1.getParameters().end(); ++iter)
ASSERT_EQUAL(iter->second, (*s2.getParameters().find(iter->first)).second);
}
}
void testCheckpoint() {
const int numParticles = 100;
const double boxSize = 5.0;
const double temperature = 200.0;
CudaPlatform platform;
System system;
system.addForce(new AndersenThermostat(0.0, 100.0));
NonbondedForce* nonbonded = new NonbondedForce();
system.addForce(nonbonded);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(i%2 == 0 ? 0.1 : -0.1, 0.2, 0.1);
bool clash;
do {
clash = false;
positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
for (int j = 0; j < i; j++) {
Vec3 delta = positions[i]-positions[j];
if (sqrt(delta.dot(delta)) < 0.1)
clash = true;
}
} while (clash);
}
VerletIntegrator integrator(0.001);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
context.setParameter(AndersenThermostat::Temperature(), temperature);
// Run for a little while.
integrator.step(100);
// Record the current state and make a checkpoint.
State s1 = context.getState(State::Positions | State::Velocities | State::Parameters);
stringstream stream1(ios_base::out | ios_base::in | ios_base::binary);
context.createCheckpoint(stream1);
// Continue the simulation for a few more steps and record the state again.
integrator.step(10);
State s2 = context.getState(State::Positions | State::Velocities | State::Parameters);
// Restore from the checkpoint and see if everything gets restored correctly.
context.setPeriodicBoxVectors(Vec3(2*boxSize, 0, 0), Vec3(0, 2*boxSize, 0), Vec3(0, 0, 2*boxSize));
context.setParameter(AndersenThermostat::Temperature(), temperature+10);
context.loadCheckpoint(stream1);
State s3 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s1, s3);
// Now simulate from there and see if the trajectory is identical.
integrator.step(10);
State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s2, s4);
// Create a new Context that uses multiple devices.
string deviceIndex = platform.getPropertyValue(context, CudaPlatform::CudaDeviceIndex());
map<string, string> props;
props[CudaPlatform::CudaDeviceIndex()] = deviceIndex+","+deviceIndex;
VerletIntegrator integrator2(0.001);
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
context2.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
context2.setParameter(AndersenThermostat::Temperature(), temperature);
// Now repeat all of the above tests with it.
integrator2.step(100);
State s5 = context2.getState(State::Positions | State::Velocities | State::Parameters);
stringstream stream2(ios_base::out | ios_base::in | ios_base::binary);
context2.createCheckpoint(stream2);
integrator2.step(10);
State s6 = context2.getState(State::Positions | State::Velocities | State::Parameters);
context2.setPeriodicBoxVectors(Vec3(2*boxSize, 0, 0), Vec3(0, 2*boxSize, 0), Vec3(0, 0, 2*boxSize));
context2.setParameter(AndersenThermostat::Temperature(), temperature+10);
context2.loadCheckpoint(stream2);
State s7 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s5, s7);
integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8);
}
int main() {
try {
testCheckpoint();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
...@@ -164,7 +164,7 @@ void testParallelComputation() { ...@@ -164,7 +164,7 @@ void testParallelComputation() {
int main() { int main() {
try { try {
testAngles(); testAngles();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -169,7 +169,7 @@ int main() { ...@@ -169,7 +169,7 @@ int main() {
try { try {
testBonds(); testBonds();
testManyParameters(); testManyParameters();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -205,7 +205,7 @@ int main() { ...@@ -205,7 +205,7 @@ int main() {
try { try {
testBond(); testBond();
testPositionDependence(); testPositionDependence();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -166,7 +166,7 @@ int main() { ...@@ -166,7 +166,7 @@ int main() {
try { try {
testForce(); testForce();
testManyParameters(); testManyParameters();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -424,7 +424,7 @@ int main() { ...@@ -424,7 +424,7 @@ int main() {
testPeriodic(); testPeriodic();
testTabulatedFunction(); testTabulatedFunction();
testCoulombLennardJones(); testCoulombLennardJones();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -205,7 +205,7 @@ int main() { ...@@ -205,7 +205,7 @@ int main() {
try { try {
testTorsions(); testTorsions();
testRange(); testRange();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -130,7 +130,7 @@ void testParallelComputation() { ...@@ -130,7 +130,7 @@ void testParallelComputation() {
int main() { int main() {
try { try {
testAngles(); testAngles();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -121,7 +121,7 @@ void testParallelComputation() { ...@@ -121,7 +121,7 @@ void testParallelComputation() {
int main() { int main() {
try { try {
testBonds(); testBonds();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -814,8 +814,8 @@ int main() { ...@@ -814,8 +814,8 @@ int main() {
testBlockInteractions(true); testBlockInteractions(true);
testDispersionCorrection(); testDispersionCorrection();
testChangingParameters(); testChangingParameters();
// testParallelComputation(false); testParallelComputation(false);
// testParallelComputation(true); testParallelComputation(true);
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -124,7 +124,7 @@ void testParallelComputation() { ...@@ -124,7 +124,7 @@ void testParallelComputation() {
int main() { int main() {
try { try {
testPeriodicTorsions(); testPeriodicTorsions();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -143,7 +143,7 @@ void testParallelComputation() { ...@@ -143,7 +143,7 @@ void testParallelComputation() {
int main() { int main() {
try { try {
testRBTorsions(); testRBTorsions();
// testParallelComputation(); testParallelComputation();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment