Commit eb232608 authored by John Chodera (MSKCC)'s avatar John Chodera (MSKCC)
Browse files

Merge remote-tracking branch 'upstream/master'

parents 62581e9c 7f8c5089
...@@ -25,8 +25,6 @@ ...@@ -25,8 +25,6 @@
#include <string.h> #include <string.h>
#include <sstream> #include <sstream>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "CpuCustomGBForce.h" #include "CpuCustomGBForce.h"
......
...@@ -26,8 +26,6 @@ ...@@ -26,8 +26,6 @@
#include <sstream> #include <sstream>
#include <utility> #include <utility>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "CpuCustomManyParticleForce.h" #include "CpuCustomManyParticleForce.h"
......
...@@ -25,8 +25,6 @@ ...@@ -25,8 +25,6 @@
#include <string.h> #include <string.h>
#include <sstream> #include <sstream>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h" #include "ReferenceForce.h"
#include "CpuCustomNonbondedForce.h" #include "CpuCustomNonbondedForce.h"
......
...@@ -231,13 +231,13 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -231,13 +231,13 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
throw OpenMMException("Particle coordinate is nan"); throw OpenMMException("Particle coordinate is nan");
} }
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
// Sum the forces from all the threads. // Sum the forces from all the threads.
SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data); SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data);
data.threads.execute(task); data.threads.execute(task);
data.threads.waitForThreads(); data.threads.waitForThreads();
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups); return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
} }
CpuCalcPeriodicTorsionForceKernel::~CpuCalcPeriodicTorsionForceKernel() { CpuCalcPeriodicTorsionForceKernel::~CpuCalcPeriodicTorsionForceKernel() {
...@@ -508,13 +508,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -508,13 +508,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
if (nonbondedMethod == PME) { if (nonbondedMethod == PME) {
// If available, use the optimized PME implementation. // If available, use the optimized PME implementation.
try { vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context); optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
useOptimizedPme = true;
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
} }
} }
} }
......
...@@ -23,8 +23,6 @@ ...@@ -23,8 +23,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "CpuLangevinDynamics.h" #include "CpuLangevinDynamics.h"
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#include <complex> #include <complex>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForce.h" #include "CpuNonbondedForce.h"
#include "ReferenceForce.h" #include "ReferenceForce.h"
...@@ -217,7 +216,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -217,7 +216,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup reciprocal box // setup reciprocal box
float recipBoxSize[3] = { TWO_PI / periodicBoxVectors[0][0], TWO_PI / periodicBoxVectors[1][1], TWO_PI / periodicBoxVectors[2][2]}; float recipBoxSize[3] = {(float) (TWO_PI/periodicBoxVectors[0][0]), (float) (TWO_PI/periodicBoxVectors[1][1]), (float) (TWO_PI/periodicBoxVectors[2][2])};
// setup K-vectors // setup K-vectors
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h" #include "CpuNonbondedForceVec4.h"
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h" #include "CpuNonbondedForceVec8.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
......
...@@ -62,7 +62,7 @@ void testNeighborList(bool periodic, bool triclinic) { ...@@ -62,7 +62,7 @@ void testNeighborList(bool periodic, bool triclinic) {
boxVectors[1] = RealVec(0, 15, 0); boxVectors[1] = RealVec(0, 15, 0);
boxVectors[2] = RealVec(0, 0, 22); boxVectors[2] = RealVec(0, 0, 22);
} }
const float boxSize[3] = {boxVectors[0][0], boxVectors[1][1], boxVectors[2][2]}; const float boxSize[3] = {(float) boxVectors[0][0], (float) boxVectors[1][1], (float) boxVectors[2][2]};
const int blockSize = 8; const int blockSize = 8;
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
......
...@@ -92,11 +92,13 @@ public: ...@@ -92,11 +92,13 @@ public:
* @param includeForce true if forces should be computed * @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed * @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include * @param groups a set of bit flags for which force groups to include
* @param valid the method may set this to false to indicate the results are invalid and the force/energy
* calculation should be repeated
* @return the potential energy of the system. This value is added to all values returned by ForceImpls' * @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the * calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here. * energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/ */
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups); double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid);
private: private:
CudaContext& cu; CudaContext& cu;
}; };
......
...@@ -278,7 +278,7 @@ private: ...@@ -278,7 +278,7 @@ private:
std::string kernelSource; std::string kernelSource;
std::map<std::string, std::string> kernelDefines; std::map<std::string, std::string> kernelDefines;
double cutoff; double cutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, numForceThreadBlocks, forceThreadBlockSize, nonbondedForceGroup, numAtoms; int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, numForceThreadBlocks, forceThreadBlockSize, nonbondedForceGroup, numAtoms;
}; };
......
...@@ -69,11 +69,13 @@ public: ...@@ -69,11 +69,13 @@ public:
* @param includeForce true if forces should be computed * @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed * @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include * @param groups a set of bit flags for which force groups to include
* @param valid the method may set this to false to indicate the results are invalid and the force/energy
* calculation should be repeated
* @return the potential energy of the system. This value is added to all values returned by ForceImpls' * @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the * calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here. * energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/ */
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups); double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid);
private: private:
class BeginComputationTask; class BeginComputationTask;
class FinishComputationTask; class FinishComputationTask;
...@@ -81,6 +83,7 @@ private: ...@@ -81,6 +83,7 @@ private:
std::vector<Kernel> kernels; std::vector<Kernel> kernels;
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions; std::vector<double> contextNonbondedFractions;
int* tileCounts;
CudaArray* contextForces; CudaArray* contextForces;
void* pinnedPositionBuffer; void* pinnedPositionBuffer;
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
......
...@@ -77,12 +77,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -77,12 +77,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
try { if (platformData.context != NULL) {
compilerKernel = platformData.context->getPlatform().createKernel(CudaCompilerKernel::Name(), *platformData.context); try {
hasCompilerKernel = true; compilerKernel = platformData.context->getPlatform().createKernel(CudaCompilerKernel::Name(), *platformData.context);
} hasCompilerKernel = true;
catch (...) { }
// The runtime compiler plugin isn't available. catch (...) {
// The runtime compiler plugin isn't available.
}
} }
if (hostCompiler.size() > 0) if (hostCompiler.size() > 0)
this->compiler = compiler+" --compiler-bindir "+hostCompiler; this->compiler = compiler+" --compiler-bindir "+hostCompiler;
......
...@@ -104,7 +104,7 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool ...@@ -104,7 +104,7 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
nb.prepareInteractions(); nb.prepareInteractions();
} }
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) { double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.getBondedUtilities().computeInteractions(groups); cu.getBondedUtilities().computeInteractions(groups);
if ((groups&(1<<cu.getNonbondedUtilities().getForceGroup())) != 0) if ((groups&(1<<cu.getNonbondedUtilities().getForceGroup())) != 0)
cu.getNonbondedUtilities().computeInteractions(); cu.getNonbondedUtilities().computeInteractions();
......
...@@ -62,10 +62,10 @@ private: ...@@ -62,10 +62,10 @@ private:
bool useDouble; bool useDouble;
}; };
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), cutoff(-1.0), useCutoff(false), anyExclusions(false), usePadding(true), CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), cutoff(-1.0), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL), interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), nonbondedForceGroup(0) { oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), nonbondedForceGroup(0), forceRebuildNeighborList(true) {
// Decide how many thread blocks to use. // Decide how many thread blocks to use.
string errorMessage = "Error initializing nonbonded utilities"; string errorMessage = "Error initializing nonbonded utilities";
...@@ -264,14 +264,6 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -264,14 +264,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortedBlockCenter = new CudaArray(context, numAtomBlocks+1, 4*elementSize, "sortedBlockCenter"); sortedBlockCenter = new CudaArray(context, numAtomBlocks+1, 4*elementSize, "sortedBlockCenter");
sortedBlockBoundingBox = new CudaArray(context, numAtomBlocks+1, 4*elementSize, "sortedBlockBoundingBox"); sortedBlockBoundingBox = new CudaArray(context, numAtomBlocks+1, 4*elementSize, "sortedBlockBoundingBox");
oldPositions = new CudaArray(context, numAtoms, 4*elementSize, "oldPositions"); oldPositions = new CudaArray(context, numAtoms, 4*elementSize, "oldPositions");
if (context.getUseDoublePrecision()) {
vector<double4> oldPositionsVec(numAtoms, make_double4(1e30, 1e30, 1e30, 0));
oldPositions->upload(oldPositionsVec);
}
else {
vector<float4> oldPositionsVec(numAtoms, make_float4(1e30f, 1e30f, 1e30f, 0));
oldPositions->upload(oldPositionsVec);
}
rebuildNeighborList = CudaArray::create<int>(context, 1, "rebuildNeighborList"); rebuildNeighborList = CudaArray::create<int>(context, 1, "rebuildNeighborList");
blockSorter = new CudaSort(context, new BlockSortTrait(context.getUseDoublePrecision()), numAtomBlocks); blockSorter = new CudaSort(context, new BlockSortTrait(context.getUseDoublePrecision()), numAtomBlocks);
vector<unsigned int> count(1, 0); vector<unsigned int> count(1, 0);
...@@ -322,6 +314,7 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -322,6 +314,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortBoxDataArgs.push_back(&oldPositions->getDevicePointer()); sortBoxDataArgs.push_back(&oldPositions->getDevicePointer());
sortBoxDataArgs.push_back(&interactionCount->getDevicePointer()); sortBoxDataArgs.push_back(&interactionCount->getDevicePointer());
sortBoxDataArgs.push_back(&rebuildNeighborList->getDevicePointer()); sortBoxDataArgs.push_back(&rebuildNeighborList->getDevicePointer());
sortBoxDataArgs.push_back(&forceRebuildNeighborList);
findInteractingBlocksKernel = context.getKernel(interactingBlocksProgram, "findBlocksWithInteractions"); findInteractingBlocksKernel = context.getKernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractingBlocksArgs.push_back(context.getPeriodicBoxSizePointer()); findInteractingBlocksArgs.push_back(context.getPeriodicBoxSizePointer());
findInteractingBlocksArgs.push_back(context.getInvPeriodicBoxSizePointer()); findInteractingBlocksArgs.push_back(context.getInvPeriodicBoxSizePointer());
...@@ -363,6 +356,7 @@ void CudaNonbondedUtilities::prepareInteractions() { ...@@ -363,6 +356,7 @@ void CudaNonbondedUtilities::prepareInteractions() {
blockSorter->sort(*sortedBlocks); blockSorter->sort(*sortedBlocks);
context.executeKernel(sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms()); context.executeKernel(sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256); context.executeKernel(findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
forceRebuildNeighborList = false;
} }
void CudaNonbondedUtilities::computeInteractions() { void CudaNonbondedUtilities::computeInteractions() {
...@@ -400,14 +394,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -400,14 +394,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[17] = &interactingAtoms->getDevicePointer(); forceArgs[17] = &interactingAtoms->getDevicePointer();
findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer(); findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer();
if (context.getUseDoublePrecision()) { forceRebuildNeighborList = true;
vector<double4> oldPositionsVec(numAtoms, make_double4(1e30, 1e30, 1e30, 0));
oldPositions->upload(oldPositionsVec);
}
else {
vector<float4> oldPositionsVec(numAtoms, make_float4(1e30f, 1e30f, 1e30f, 0));
oldPositions->upload(oldPositionsVec);
}
} }
void CudaNonbondedUtilities::setUsePadding(bool padding) { void CudaNonbondedUtilities::setUsePadding(bool padding) {
...@@ -419,8 +406,9 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF ...@@ -419,8 +406,9 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
startBlockIndex = (int) (startFraction*numAtomBlocks); startBlockIndex = (int) (startFraction*numAtomBlocks);
numBlocks = (int) (endFraction*numAtomBlocks)-startBlockIndex; numBlocks = (int) (endFraction*numAtomBlocks)-startBlockIndex;
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
startTileIndex = (int) (startFraction*totalTiles);; startTileIndex = (int) (startFraction*totalTiles);
numTiles = (int) (endFraction*totalTiles)-startTileIndex; numTiles = (int) (endFraction*totalTiles)-startTileIndex;
forceRebuildNeighborList = true;
} }
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) { CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -63,8 +63,8 @@ if (result != CUDA_SUCCESS) { \ ...@@ -63,8 +63,8 @@ if (result != CUDA_SUCCESS) { \
class CudaParallelCalcForcesAndEnergyKernel::BeginComputationTask : public CudaContext::WorkTask { class CudaParallelCalcForcesAndEnergyKernel::BeginComputationTask : public CudaContext::WorkTask {
public: public:
BeginComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel, BeginComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, void* pinnedMemory, CUevent event) : context(context), cu(cu), kernel(kernel), bool includeForce, bool includeEnergy, int groups, void* pinnedMemory, CUevent event, int& numTiles) : context(context), cu(cu), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), pinnedMemory(pinnedMemory), event(event) { includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), pinnedMemory(pinnedMemory), event(event), numTiles(numTiles) {
} }
void execute() { void execute() {
// Copy coordinates over to this device and execute the kernel. // Copy coordinates over to this device and execute the kernel.
...@@ -76,6 +76,8 @@ public: ...@@ -76,6 +76,8 @@ public:
cu.getPosq().upload(pinnedMemory, false); cu.getPosq().upload(pinnedMemory, false);
} }
kernel.beginComputation(context, includeForce, includeEnergy, groups); kernel.beginComputation(context, includeForce, includeEnergy, groups);
if (cu.getNonbondedUtilities().getUsePeriodic())
cu.getNonbondedUtilities().getInteractionCount().download(&numTiles, false);
} }
private: private:
ContextImpl& context; ContextImpl& context;
...@@ -85,19 +87,20 @@ private: ...@@ -85,19 +87,20 @@ private:
int groups; int groups;
void* pinnedMemory; void* pinnedMemory;
CUevent event; CUevent event;
int& numTiles;
}; };
class CudaParallelCalcForcesAndEnergyKernel::FinishComputationTask : public CudaContext::WorkTask { class CudaParallelCalcForcesAndEnergyKernel::FinishComputationTask : public CudaContext::WorkTask {
public: public:
FinishComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel, FinishComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, CudaArray& contextForces) : bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, CudaArray& contextForces, bool& valid, int& numTiles) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy), context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces) { completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), numTiles(numTiles) {
} }
void execute() { void execute() {
// Execute the kernel, then download forces. // Execute the kernel, then download forces.
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups); energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid);
if (cu.getComputeForceCount() < 200) { if (cu.getComputeForceCount() < 200) {
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation. // Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
...@@ -117,6 +120,10 @@ public: ...@@ -117,6 +120,10 @@ public:
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]); cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
} }
} }
if (cu.getNonbondedUtilities().getUsePeriodic() && numTiles > cu.getNonbondedUtilities().getInteractingTiles().getSize()) {
valid = false;
cu.getNonbondedUtilities().updateNeighborListSize();
}
} }
private: private:
ContextImpl& context; ContextImpl& context;
...@@ -128,11 +135,13 @@ private: ...@@ -128,11 +135,13 @@ private:
long long& completionTime; long long& completionTime;
long long* pinnedMemory; long long* pinnedMemory;
CudaArray& contextForces; CudaArray& contextForces;
bool& valid;
int& numTiles;
}; };
CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) : CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()), contextForces(NULL), CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) { tileCounts(NULL), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++) for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i]))); kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
} }
...@@ -147,6 +156,8 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel() ...@@ -147,6 +156,8 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
cuMemFreeHost(pinnedForceBuffer); cuMemFreeHost(pinnedForceBuffer);
cuEventDestroy(event); cuEventDestroy(event);
cuStreamDestroy(peerCopyStream); cuStreamDestroy(peerCopyStream);
if (tileCounts != NULL)
cuMemFreeHost(tileCounts);
} }
void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) { void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
...@@ -154,12 +165,14 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -154,12 +165,14 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
cu.setAsCurrent(); cu.setAsCurrent();
CUmodule module = cu.createModule(CudaKernelSources::parallel); CUmodule module = cu.createModule(CudaKernelSources::parallel);
sumKernel = cu.getKernel(module, "sumForces"); sumKernel = cu.getKernel(module, "sumForces");
for (int i = 0; i < (int) kernels.size(); i++) int numContexts = data.contexts.size();
for (int i = 0; i < numContexts; i++)
getKernel(i).initialize(system); getKernel(i).initialize(system);
for (int i = 0; i < (int) contextNonbondedFractions.size(); i++) for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) contextNonbondedFractions.size(); contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(cuEventCreate(&event, 0), "Error creating event"); CHECK_RESULT(cuEventCreate(&event, 0), "Error creating event");
CHECK_RESULT(cuStreamCreate(&peerCopyStream, CU_STREAM_NON_BLOCKING), "Error creating stream"); CHECK_RESULT(cuStreamCreate(&peerCopyStream, CU_STREAM_NON_BLOCKING), "Error creating stream");
CHECK_RESULT(cuMemHostAlloc((void**) &tileCounts, numContexts*sizeof(int), 0), "Error creating tile count buffer");
} }
void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
...@@ -189,22 +202,21 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex ...@@ -189,22 +202,21 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
data.contextEnergy[i] = 0.0; data.contextEnergy[i] = 0.0;
CudaContext& cu = *data.contexts[i]; CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread(); CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event)); thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, tileCounts[i]));
} }
} }
#include <cstdio> double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i]; CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread(); CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, *contextForces)); thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, *contextForces, valid, tileCounts[i]));
} }
data.syncContexts(); data.syncContexts();
double energy = 0.0; double energy = 0.0;
for (int i = 0; i < (int) data.contextEnergy.size(); i++) for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i]; energy += data.contextEnergy[i];
if (includeForce) { if (includeForce && valid) {
// Sum the forces from all devices. // Sum the forces from all devices.
CudaContext& cu = *data.contexts[0]; CudaContext& cu = *data.contexts[0];
......
...@@ -43,7 +43,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize, ...@@ -43,7 +43,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, const real4* __restrict__ blockCenter, extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockBoundingBox, real4* __restrict__ sortedBlockCenter, const real4* __restrict__ blockBoundingBox, real4* __restrict__ sortedBlockCenter,
real4* __restrict__ sortedBlockBoundingBox, const real4* __restrict__ posq, const real4* __restrict__ oldPositions, real4* __restrict__ sortedBlockBoundingBox, const real4* __restrict__ posq, const real4* __restrict__ oldPositions,
unsigned int* __restrict__ interactionCount, int* __restrict__ rebuildNeighborList) { unsigned int* __restrict__ interactionCount, int* __restrict__ rebuildNeighborList, bool forceRebuild) {
for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < NUM_BLOCKS; i += blockDim.x*gridDim.x) { for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < NUM_BLOCKS; i += blockDim.x*gridDim.x) {
int index = (int) sortedBlock[i].y; int index = (int) sortedBlock[i].y;
sortedBlockCenter[i] = blockCenter[index]; sortedBlockCenter[i] = blockCenter[index];
...@@ -52,7 +52,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co ...@@ -52,7 +52,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
// Also check whether any atom has moved enough so that we really need to rebuild the neighbor list. // Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.
bool rebuild = false; bool rebuild = forceRebuild;
for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real4 delta = oldPositions[i]-posq[i]; real4 delta = oldPositions[i]-posq[i];
if (delta.x*delta.x + delta.y*delta.y + delta.z*delta.z > 0.25f*PADDING*PADDING) if (delta.x*delta.x + delta.y*delta.y + delta.z*delta.z > 0.25f*PADDING*PADDING)
......
...@@ -77,10 +77,10 @@ void testHarmonicBonds() { ...@@ -77,10 +77,10 @@ void testHarmonicBonds() {
} }
void testLargeSystem() { void testLargeSystem() {
const int numMolecules = 50; const int numMolecules = 25;
const int numParticles = numMolecules*2; const int numParticles = numMolecules*2;
const double cutoff = 2.0; const double cutoff = 2.0;
const double boxSize = 5.0; const double boxSize = 4.0;
const double tolerance = 5; const double tolerance = 5;
System system; System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize)); system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
...@@ -134,10 +134,10 @@ void testLargeSystem() { ...@@ -134,10 +134,10 @@ void testLargeSystem() {
} }
void testVirtualSites() { void testVirtualSites() {
const int numMolecules = 50; const int numMolecules = 25;
const int numParticles = numMolecules*3; const int numParticles = numMolecules*3;
const double cutoff = 2.0; const double cutoff = 2.0;
const double boxSize = 5.0; const double boxSize = 4.0;
const double tolerance = 5; const double tolerance = 5;
System system; System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize)); system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
......
...@@ -296,7 +296,7 @@ void testArgonBox() { ...@@ -296,7 +296,7 @@ void testArgonBox() {
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize)); system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
system.addForce(nonbonded); system.addForce(nonbonded);
VariableLangevinIntegrator integrator(temp, 6.0, 1e-5); VariableLangevinIntegrator integrator(temp, 6.0, 1e-4);
Context context(system, integrator, platform); Context context(system, integrator, platform);
context.setPositions(positions); context.setPositions(positions);
context.setVelocitiesToTemperature(temp); context.setVelocitiesToTemperature(temp);
...@@ -308,13 +308,13 @@ void testArgonBox() { ...@@ -308,13 +308,13 @@ void testArgonBox() {
// Make sure the temperature is correct. // Make sure the temperature is correct.
double ke = 0.0; double ke = 0.0;
for (int i = 0; i < 2000; ++i) { for (int i = 0; i < 1000; ++i) {
double t = 2.0 + 0.01 * (i + 1); double t = 2.0 + 0.02 * (i + 1);
integrator.stepTo(t); integrator.stepTo(t);
State state = context.getState(State::Energy); State state = context.getState(State::Energy);
ke += state.getKineticEnergy(); ke += state.getKineticEnergy();
} }
ke /= 2000; ke /= 1000;
double expected = 1.5 * numParticles * BOLTZ * temp; double expected = 1.5 * numParticles * BOLTZ * temp;
ASSERT_USUALLY_EQUAL_TOL(expected, ke, 0.01); ASSERT_USUALLY_EQUAL_TOL(expected, ke, 0.01);
} }
......
...@@ -71,11 +71,13 @@ public: ...@@ -71,11 +71,13 @@ public:
* @param includeForce true if forces should be computed * @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed * @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include * @param groups a set of bit flags for which force groups to include
* @param valid the method may set this to false to indicate the results are invalid and the force/energy
* calculation should be repeated
* @return the potential energy of the system. This value is added to all values returned by ForceImpls' * @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the * calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here. * energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/ */
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups); double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid);
private: private:
OpenCLContext& cl; OpenCLContext& cl;
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment