Commit 372f1724 authored by Peter Eastman's avatar Peter Eastman
Browse files

Finished converting CudaArrays.

parent f15f591b
...@@ -1154,12 +1154,8 @@ private: ...@@ -1154,12 +1154,8 @@ private:
class CudaCalcGayBerneForceKernel : public CalcGayBerneForceKernel { class CudaCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public: public:
CudaCalcGayBerneForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGayBerneForceKernel(name, platform), cu(cu), CudaCalcGayBerneForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGayBerneForceKernel(name, platform), cu(cu),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL), hasInitializedKernels(false) {
exceptionParams(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
} }
~CudaCalcGayBerneForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1191,25 +1187,25 @@ private: ...@@ -1191,25 +1187,25 @@ private:
bool hasInitializedKernels; bool hasInitializedKernels;
int numRealParticles, numExceptions, maxNeighborBlocks; int numRealParticles, numExceptions, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod; GayBerneForce::NonbondedMethod nonbondedMethod;
CudaArray* sortedParticles; CudaArray sortedParticles;
CudaArray* axisParticleIndices; CudaArray axisParticleIndices;
CudaArray* sigParams; CudaArray sigParams;
CudaArray* epsParams; CudaArray epsParams;
CudaArray* scale; CudaArray scale;
CudaArray* exceptionParticles; CudaArray exceptionParticles;
CudaArray* exceptionParams; CudaArray exceptionParams;
CudaArray* aMatrix; CudaArray aMatrix;
CudaArray* bMatrix; CudaArray bMatrix;
CudaArray* gMatrix; CudaArray gMatrix;
CudaArray* exclusions; CudaArray exclusions;
CudaArray* exclusionStartIndex; CudaArray exclusionStartIndex;
CudaArray* blockCenter; CudaArray blockCenter;
CudaArray* blockBoundingBox; CudaArray blockBoundingBox;
CudaArray* neighbors; CudaArray neighbors;
CudaArray* neighborIndex; CudaArray neighborIndex;
CudaArray* neighborBlockCount; CudaArray neighborBlockCount;
CudaArray* sortedPos; CudaArray sortedPos;
CudaArray* torque; CudaArray torque;
std::vector<bool> isRealParticle; std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs; std::vector<std::pair<int, int> > excludedPairs;
...@@ -1224,9 +1220,8 @@ private: ...@@ -1224,9 +1220,8 @@ private:
class CudaCalcCustomCVForceKernel : public CalcCustomCVForceKernel { class CudaCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public: public:
CudaCalcCustomCVForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcCustomCVForceKernel(name, platform), CudaCalcCustomCVForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcCustomCVForceKernel(name, platform),
cu(cu), hasInitializedListeners(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) { cu(cu), hasInitializedListeners(false) {
} }
~CudaCalcCustomCVForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1260,9 +1255,9 @@ private: ...@@ -1260,9 +1255,9 @@ private:
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames; std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions; std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions; std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<CudaArray*> cvForces; std::vector<CudaArray> cvForces;
CudaArray* invAtomOrder; CudaArray invAtomOrder;
CudaArray* innerInvAtomOrder; CudaArray innerInvAtomOrder;
CUfunction copyStateKernel, copyForcesKernel, addForcesKernel; CUfunction copyStateKernel, copyForcesKernel, addForcesKernel;
}; };
...@@ -1271,10 +1266,8 @@ private: ...@@ -1271,10 +1266,8 @@ private:
*/ */
class CudaCalcRMSDForceKernel : public CalcRMSDForceKernel { class CudaCalcRMSDForceKernel : public CalcRMSDForceKernel {
public: public:
CudaCalcRMSDForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcRMSDForceKernel(name, platform), CudaCalcRMSDForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcRMSDForceKernel(name, platform), cu(cu) {
cu(cu), referencePos(NULL), particles(NULL), buffer(NULL) {
} }
~CudaCalcRMSDForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1313,9 +1306,9 @@ private: ...@@ -1313,9 +1306,9 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
double sumNormRef; double sumNormRef;
CudaArray* referencePos; CudaArray referencePos;
CudaArray* particles; CudaArray particles;
CudaArray* buffer; CudaArray buffer;
CUfunction kernel1, kernel2; CUfunction kernel1, kernel2;
}; };
...@@ -1326,7 +1319,6 @@ class CudaIntegrateVerletStepKernel : public IntegrateVerletStepKernel { ...@@ -1326,7 +1319,6 @@ class CudaIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
public: public:
CudaIntegrateVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVerletStepKernel(name, platform), cu(cu) { CudaIntegrateVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVerletStepKernel(name, platform), cu(cu) {
} }
~CudaIntegrateVerletStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1358,9 +1350,8 @@ private: ...@@ -1358,9 +1350,8 @@ private:
*/ */
class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel { class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public: public:
CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu), params(NULL) { CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu) {
} }
~CudaIntegrateLangevinStepKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1385,7 +1376,7 @@ public: ...@@ -1385,7 +1376,7 @@ public:
private: private:
CudaContext& cu; CudaContext& cu;
double prevTemp, prevFriction, prevStepSize; double prevTemp, prevFriction, prevStepSize;
CudaArray* params; CudaArray params;
CUfunction kernel1, kernel2; CUfunction kernel1, kernel2;
}; };
...@@ -1396,7 +1387,6 @@ class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel { ...@@ -1396,7 +1387,6 @@ class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
public: public:
CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBrownianStepKernel(name, platform), cu(cu) { CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBrownianStepKernel(name, platform), cu(cu) {
} }
~CudaIntegrateBrownianStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1431,7 +1421,6 @@ class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStep ...@@ -1431,7 +1421,6 @@ class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStep
public: public:
CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableVerletStepKernel(name, platform), cu(cu) { CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableVerletStepKernel(name, platform), cu(cu) {
} }
~CudaIntegrateVariableVerletStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1466,10 +1455,8 @@ private: ...@@ -1466,10 +1455,8 @@ private:
*/ */
class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel { class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
public: public:
CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform), CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform), cu(cu) {
cu(cu), params(NULL) {
} }
~CudaIntegrateVariableLangevinStepKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1496,7 +1483,7 @@ public: ...@@ -1496,7 +1483,7 @@ public:
private: private:
CudaContext& cu; CudaContext& cu;
int blockSize; int blockSize;
CudaArray* params; CudaArray params;
CUfunction kernel1, kernel2, selectSizeKernel; CUfunction kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol; double prevTemp, prevFriction, prevErrorTol;
}; };
...@@ -1508,8 +1495,7 @@ class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel { ...@@ -1508,8 +1495,7 @@ class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public: public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER}; enum GlobalTargetType {DT, VARIABLE, PARAMETER};
CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu), CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
} }
~CudaIntegrateCustomStepKernel(); ~CudaIntegrateCustomStepKernel();
/** /**
...@@ -1590,15 +1576,15 @@ private: ...@@ -1590,15 +1576,15 @@ private:
int numGlobalVariables, sumWorkGroupSize; int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
CudaArray* globalValues; CudaArray globalValues;
CudaArray* sumBuffer; CudaArray sumBuffer;
CudaArray* summedValue; CudaArray summedValue;
CudaArray* uniformRandoms; CudaArray uniformRandoms;
CudaArray* randomSeed; CudaArray randomSeed;
CudaArray* perDofEnergyParamDerivs; CudaArray perDofEnergyParamDerivs;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray> tabulatedFunctions;
std::map<int, double> savedEnergy; std::map<int, double> savedEnergy;
std::map<int, CudaArray*> savedForces; std::map<int, CudaArray> savedForces;
std::set<int> validSavedForces; std::set<int> validSavedForces;
CudaParameterSet* perDofValues; CudaParameterSet* perDofValues;
mutable std::vector<std::vector<float> > localPerDofValuesFloat; mutable std::vector<std::vector<float> > localPerDofValuesFloat;
...@@ -1651,10 +1637,8 @@ public: ...@@ -1651,10 +1637,8 @@ public:
*/ */
class CudaApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel { class CudaApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
public: public:
CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyAndersenThermostatKernel(name, platform), cu(cu), CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyAndersenThermostatKernel(name, platform), cu(cu) {
atomGroups(NULL) {
} }
~CudaApplyAndersenThermostatKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1671,7 +1655,7 @@ public: ...@@ -1671,7 +1655,7 @@ public:
private: private:
CudaContext& cu; CudaContext& cu;
int randomSeed; int randomSeed;
CudaArray* atomGroups; CudaArray atomGroups;
CUfunction kernel; CUfunction kernel;
}; };
...@@ -1681,9 +1665,8 @@ private: ...@@ -1681,9 +1665,8 @@ private:
class CudaApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel { class CudaApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public: public:
CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyMonteCarloBarostatKernel(name, platform), cu(cu), CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyMonteCarloBarostatKernel(name, platform), cu(cu),
hasInitializedKernels(false), savedPositions(NULL), savedForces(NULL), moleculeAtoms(NULL), moleculeStartIndex(NULL) { hasInitializedKernels(false) {
} }
~CudaApplyMonteCarloBarostatKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1715,10 +1698,10 @@ private: ...@@ -1715,10 +1698,10 @@ private:
CudaContext& cu; CudaContext& cu;
bool hasInitializedKernels; bool hasInitializedKernels;
int numMolecules; int numMolecules;
CudaArray* savedPositions; CudaArray savedPositions;
CudaArray* savedForces; CudaArray savedForces;
CudaArray* moleculeAtoms; CudaArray moleculeAtoms;
CudaArray* moleculeStartIndex; CudaArray moleculeStartIndex;
CUfunction kernel; CUfunction kernel;
std::vector<int> lastAtomOrder; std::vector<int> lastAtomOrder;
}; };
...@@ -1728,9 +1711,8 @@ private: ...@@ -1728,9 +1711,8 @@ private:
*/ */
class CudaRemoveCMMotionKernel : public RemoveCMMotionKernel { class CudaRemoveCMMotionKernel : public RemoveCMMotionKernel {
public: public:
CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cu) : RemoveCMMotionKernel(name, platform), cu(cu), cmMomentum(NULL) { CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cu) : RemoveCMMotionKernel(name, platform), cu(cu) {
} }
~CudaRemoveCMMotionKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1747,7 +1729,7 @@ public: ...@@ -1747,7 +1729,7 @@ public:
private: private:
CudaContext& cu; CudaContext& cu;
int frequency; int frequency;
CudaArray* cmMomentum; CudaArray cmMomentum;
CUfunction kernel1, kernel2; CUfunction kernel1, kernel2;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -84,7 +84,7 @@ private: ...@@ -84,7 +84,7 @@ private:
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions; std::vector<double> contextNonbondedFractions;
int2* interactionCounts; int2* interactionCounts;
CudaArray* contextForces; CudaArray contextForces;
void* pinnedPositionBuffer; void* pinnedPositionBuffer;
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
CUfunction sumKernel; CUfunction sumKernel;
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -142,15 +142,13 @@ private: ...@@ -142,15 +142,13 @@ private:
CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) : CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()), CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
interactionCounts(NULL), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) { interactionCounts(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++) for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i]))); kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
} }
CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel() { CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent(); data.contexts[0]->setAsCurrent();
if (contextForces != NULL)
delete contextForces;
if (pinnedPositionBuffer != NULL) if (pinnedPositionBuffer != NULL)
cuMemFreeHost(pinnedPositionBuffer); cuMemFreeHost(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL) if (pinnedForceBuffer != NULL)
...@@ -179,8 +177,8 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -179,8 +177,8 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
CudaContext& cu = *data.contexts[0]; CudaContext& cu = *data.contexts[0];
cu.setAsCurrent(); cu.setAsCurrent();
if (contextForces == NULL) { if (!contextForces.isInitialized()) {
contextForces = CudaArray::create<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces"); contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory"); CHECK_RESULT(cuMemHostAlloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory");
CHECK_RESULT(cuMemHostAlloc(&pinnedPositionBuffer, cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4)), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory"); CHECK_RESULT(cuMemHostAlloc(&pinnedPositionBuffer, cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4)), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory");
} }
...@@ -211,7 +209,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con ...@@ -211,7 +209,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i]; CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread(); CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, *contextForces, valid, interactionCounts[i])); thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i]));
} }
data.syncContexts(); data.syncContexts();
double energy = 0.0; double energy = 0.0;
...@@ -222,10 +220,10 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con ...@@ -222,10 +220,10 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
CudaContext& cu = *data.contexts[0]; CudaContext& cu = *data.contexts[0];
if (!cu.getPlatformData().peerAccessSupported) if (!cu.getPlatformData().peerAccessSupported)
contextForces->upload(pinnedForceBuffer, false); contextForces.upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms(); int bufferSize = 3*cu.getPaddedNumAtoms();
int numBuffers = data.contexts.size()-1; int numBuffers = data.contexts.size()-1;
void* args[] = {&cu.getForce().getDevicePointer(), &contextForces->getDevicePointer(), &bufferSize, &numBuffers}; void* args[] = {&cu.getForce().getDevicePointer(), &contextForces.getDevicePointer(), &bufferSize, &numBuffers};
cu.executeKernel(sumKernel, args, bufferSize); cu.executeKernel(sumKernel, args, bufferSize);
// Balance work between the contexts by transferring a little nonbonded work from the context that // Balance work between the contexts by transferring a little nonbonded work from the context that
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. * * Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Peter Eastman * * Authors: Mark Friedrichs, Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -48,7 +48,6 @@ public: ...@@ -48,7 +48,6 @@ public:
const Platform& platform, const Platform& platform,
CudaContext& cu, CudaContext& cu,
const System& system); const System& system);
~CudaCalcAmoebaBondForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -77,7 +76,7 @@ private: ...@@ -77,7 +76,7 @@ private:
int numBonds; int numBonds;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -86,7 +85,6 @@ private: ...@@ -86,7 +85,6 @@ private:
class CudaCalcAmoebaAngleForceKernel : public CalcAmoebaAngleForceKernel { class CudaCalcAmoebaAngleForceKernel : public CalcAmoebaAngleForceKernel {
public: public:
CudaCalcAmoebaAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaAngleForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -115,7 +113,7 @@ private: ...@@ -115,7 +113,7 @@ private:
int numAngles; int numAngles;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -124,7 +122,6 @@ private: ...@@ -124,7 +122,6 @@ private:
class CudaCalcAmoebaInPlaneAngleForceKernel : public CalcAmoebaInPlaneAngleForceKernel { class CudaCalcAmoebaInPlaneAngleForceKernel : public CalcAmoebaInPlaneAngleForceKernel {
public: public:
CudaCalcAmoebaInPlaneAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaInPlaneAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaInPlaneAngleForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -153,7 +150,7 @@ private: ...@@ -153,7 +150,7 @@ private:
int numAngles; int numAngles;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -162,7 +159,6 @@ private: ...@@ -162,7 +159,6 @@ private:
class CudaCalcAmoebaPiTorsionForceKernel : public CalcAmoebaPiTorsionForceKernel { class CudaCalcAmoebaPiTorsionForceKernel : public CalcAmoebaPiTorsionForceKernel {
public: public:
CudaCalcAmoebaPiTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaPiTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaPiTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -191,7 +187,7 @@ private: ...@@ -191,7 +187,7 @@ private:
int numPiTorsions; int numPiTorsions;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -200,7 +196,6 @@ private: ...@@ -200,7 +196,6 @@ private:
class CudaCalcAmoebaStretchBendForceKernel : public CalcAmoebaStretchBendForceKernel { class CudaCalcAmoebaStretchBendForceKernel : public CalcAmoebaStretchBendForceKernel {
public: public:
CudaCalcAmoebaStretchBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaStretchBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaStretchBendForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -229,8 +224,8 @@ private: ...@@ -229,8 +224,8 @@ private:
int numStretchBends; int numStretchBends;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
CudaArray* params1; // Equilibrium values CudaArray params1; // Equilibrium values
CudaArray* params2; // force constants CudaArray params2; // force constants
}; };
/** /**
...@@ -239,7 +234,6 @@ private: ...@@ -239,7 +234,6 @@ private:
class CudaCalcAmoebaOutOfPlaneBendForceKernel : public CalcAmoebaOutOfPlaneBendForceKernel { class CudaCalcAmoebaOutOfPlaneBendForceKernel : public CalcAmoebaOutOfPlaneBendForceKernel {
public: public:
CudaCalcAmoebaOutOfPlaneBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaOutOfPlaneBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaOutOfPlaneBendForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -268,7 +262,7 @@ private: ...@@ -268,7 +262,7 @@ private:
int numOutOfPlaneBends; int numOutOfPlaneBends;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -277,7 +271,6 @@ private: ...@@ -277,7 +271,6 @@ private:
class CudaCalcAmoebaTorsionTorsionForceKernel : public CalcAmoebaTorsionTorsionForceKernel { class CudaCalcAmoebaTorsionTorsionForceKernel : public CalcAmoebaTorsionTorsionForceKernel {
public: public:
CudaCalcAmoebaTorsionTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaTorsionTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaTorsionTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -300,9 +293,9 @@ private: ...@@ -300,9 +293,9 @@ private:
int numTorsionTorsionGrids; int numTorsionTorsionGrids;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
CudaArray* gridValues; CudaArray gridValues;
CudaArray* gridParams; CudaArray gridParams;
CudaArray* torsionParams; CudaArray torsionParams;
}; };
/** /**
...@@ -414,58 +407,58 @@ private: ...@@ -414,58 +407,58 @@ private:
const System& system; const System& system;
std::vector<int3> covalentFlagValues; std::vector<int3> covalentFlagValues;
std::vector<int2> polarizationFlagValues; std::vector<int2> polarizationFlagValues;
CudaArray* multipoleParticles; CudaArray multipoleParticles;
CudaArray* molecularDipoles; CudaArray molecularDipoles;
CudaArray* molecularQuadrupoles; CudaArray molecularQuadrupoles;
CudaArray* labFrameDipoles; CudaArray labFrameDipoles;
CudaArray* labFrameQuadrupoles; CudaArray labFrameQuadrupoles;
CudaArray* sphericalDipoles; CudaArray sphericalDipoles;
CudaArray* sphericalQuadrupoles; CudaArray sphericalQuadrupoles;
CudaArray* fracDipoles; CudaArray fracDipoles;
CudaArray* fracQuadrupoles; CudaArray fracQuadrupoles;
CudaArray* field; CudaArray field;
CudaArray* fieldPolar; CudaArray fieldPolar;
CudaArray* inducedField; CudaArray inducedField;
CudaArray* inducedFieldPolar; CudaArray inducedFieldPolar;
CudaArray* torque; CudaArray torque;
CudaArray* dampingAndThole; CudaArray dampingAndThole;
CudaArray* inducedDipole; CudaArray inducedDipole;
CudaArray* inducedDipolePolar; CudaArray inducedDipolePolar;
CudaArray* inducedDipoleErrors; CudaArray inducedDipoleErrors;
CudaArray* prevDipoles; CudaArray prevDipoles;
CudaArray* prevDipolesPolar; CudaArray prevDipolesPolar;
CudaArray* prevDipolesGk; CudaArray prevDipolesGk;
CudaArray* prevDipolesGkPolar; CudaArray prevDipolesGkPolar;
CudaArray* prevErrors; CudaArray prevErrors;
CudaArray* diisMatrix; CudaArray diisMatrix;
CudaArray* diisCoefficients; CudaArray diisCoefficients;
CudaArray* extrapolatedDipole; CudaArray extrapolatedDipole;
CudaArray* extrapolatedDipolePolar; CudaArray extrapolatedDipolePolar;
CudaArray* extrapolatedDipoleGk; CudaArray extrapolatedDipoleGk;
CudaArray* extrapolatedDipoleGkPolar; CudaArray extrapolatedDipoleGkPolar;
CudaArray* inducedDipoleFieldGradient; CudaArray inducedDipoleFieldGradient;
CudaArray* inducedDipoleFieldGradientPolar; CudaArray inducedDipoleFieldGradientPolar;
CudaArray* inducedDipoleFieldGradientGk; CudaArray inducedDipoleFieldGradientGk;
CudaArray* inducedDipoleFieldGradientGkPolar; CudaArray inducedDipoleFieldGradientGkPolar;
CudaArray* extrapolatedDipoleFieldGradient; CudaArray extrapolatedDipoleFieldGradient;
CudaArray* extrapolatedDipoleFieldGradientPolar; CudaArray extrapolatedDipoleFieldGradientPolar;
CudaArray* extrapolatedDipoleFieldGradientGk; CudaArray extrapolatedDipoleFieldGradientGk;
CudaArray* extrapolatedDipoleFieldGradientGkPolar; CudaArray extrapolatedDipoleFieldGradientGkPolar;
CudaArray* polarizability; CudaArray polarizability;
CudaArray* covalentFlags; CudaArray covalentFlags;
CudaArray* polarizationGroupFlags; CudaArray polarizationGroupFlags;
CudaArray* pmeGrid; CudaArray pmeGrid;
CudaArray* pmeBsplineModuliX; CudaArray pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY; CudaArray pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ; CudaArray pmeBsplineModuliZ;
CudaArray* pmeIgrid; CudaArray pmeIgrid;
CudaArray* pmePhi; CudaArray pmePhi;
CudaArray* pmePhid; CudaArray pmePhid;
CudaArray* pmePhip; CudaArray pmePhip;
CudaArray* pmePhidp; CudaArray pmePhidp;
CudaArray* pmeCphi; CudaArray pmeCphi;
CudaArray* pmeAtomRange; CudaArray pmeAtomRange;
CudaArray* lastPositions; CudaArray lastPositions;
CudaSort* sort; CudaSort* sort;
cufftHandle fft; cufftHandle fft;
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
...@@ -486,7 +479,6 @@ private: ...@@ -486,7 +479,6 @@ private:
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel : public CalcAmoebaGeneralizedKirkwoodForceKernel { class CudaCalcAmoebaGeneralizedKirkwoodForceKernel : public CalcAmoebaGeneralizedKirkwoodForceKernel {
public: public:
CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaGeneralizedKirkwoodForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -511,22 +503,22 @@ public: ...@@ -511,22 +503,22 @@ public:
* Perform the final parts of the force/energy computation. * Perform the final parts of the force/energy computation.
*/ */
void finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags); void finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags);
CudaArray* getBornRadii() { CudaArray& getBornRadii() {
return bornRadii; return bornRadii;
} }
CudaArray* getField() { CudaArray& getField() {
return field; return field;
} }
CudaArray* getInducedField() { CudaArray& getInducedField() {
return inducedField; return inducedField;
} }
CudaArray* getInducedFieldPolar() { CudaArray& getInducedFieldPolar() {
return inducedFieldPolar; return inducedFieldPolar;
} }
CudaArray* getInducedDipoles() { CudaArray& getInducedDipoles() {
return inducedDipoleS; return inducedDipoleS;
} }
CudaArray* getInducedDipolesPolar() { CudaArray& getInducedDipolesPolar() {
return inducedDipolePolarS; return inducedDipolePolarS;
} }
/** /**
...@@ -544,15 +536,15 @@ private: ...@@ -544,15 +536,15 @@ private:
int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads; int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads;
AmoebaMultipoleForce::PolarizationType polarizationType; AmoebaMultipoleForce::PolarizationType polarizationType;
std::map<std::string, std::string> defines; std::map<std::string, std::string> defines;
CudaArray* params; CudaArray params;
CudaArray* bornSum; CudaArray bornSum;
CudaArray* bornRadii; CudaArray bornRadii;
CudaArray* bornForce; CudaArray bornForce;
CudaArray* field; CudaArray field;
CudaArray* inducedField; CudaArray inducedField;
CudaArray* inducedFieldPolar; CudaArray inducedFieldPolar;
CudaArray* inducedDipoleS; CudaArray inducedDipoleS;
CudaArray* inducedDipolePolarS; CudaArray inducedDipolePolarS;
CUfunction computeBornSumKernel, reduceBornSumKernel, surfaceAreaKernel, gkForceKernel, chainRuleKernel, ediffKernel; CUfunction computeBornSumKernel, reduceBornSumKernel, surfaceAreaKernel, gkForceKernel, chainRuleKernel, ediffKernel;
}; };
...@@ -592,11 +584,11 @@ private: ...@@ -592,11 +584,11 @@ private:
const System& system; const System& system;
bool hasInitializedNonbonded; bool hasInitializedNonbonded;
double dispersionCoefficient; double dispersionCoefficient;
CudaArray* sigmaEpsilon; CudaArray sigmaEpsilon;
CudaArray* bondReductionAtoms; CudaArray bondReductionAtoms;
CudaArray* bondReductionFactors; CudaArray bondReductionFactors;
CudaArray* tempPosq; CudaArray tempPosq;
CudaArray* tempForces; CudaArray tempForces;
CudaNonbondedUtilities* nonbonded; CudaNonbondedUtilities* nonbonded;
CUfunction prepareKernel, spreadKernel; CUfunction prepareKernel, spreadKernel;
}; };
...@@ -607,7 +599,6 @@ private: ...@@ -607,7 +599,6 @@ private:
class CudaCalcAmoebaWcaDispersionForceKernel : public CalcAmoebaWcaDispersionForceKernel { class CudaCalcAmoebaWcaDispersionForceKernel : public CalcAmoebaWcaDispersionForceKernel {
public: public:
CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system); CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaWcaDispersionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -636,7 +627,7 @@ private: ...@@ -636,7 +627,7 @@ private:
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
double totalMaximumDispersionEnergy; double totalMaximumDispersionEnergy;
CudaArray* radiusEpsilon; CudaArray radiusEpsilon;
CUfunction forceKernel; CUfunction forceKernel;
}; };
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -100,14 +100,6 @@ private: ...@@ -100,14 +100,6 @@ private:
const DrudeForce& force; const DrudeForce& force;
}; };
CudaCalcDrudeForceKernel::~CudaCalcDrudeForceKernel() {
cu.setAsCurrent();
if (particleParams != NULL)
delete particleParams;
if (pairParams != NULL)
delete pairParams;
}
void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) { void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
cu.setAsCurrent(); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
...@@ -118,7 +110,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce ...@@ -118,7 +110,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
// Create the harmonic interaction . // Create the harmonic interaction .
vector<vector<int> > atoms(numParticles, vector<int>(5)); vector<vector<int> > atoms(numParticles, vector<int>(5));
particleParams = CudaArray::create<float4>(cu, numParticles, "drudeParticleParams"); particleParams.initialize<float4>(cu, numParticles, "drudeParticleParams");
vector<float4> paramVector(numParticles); vector<float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
double charge, polarizability, aniso12, aniso34; double charge, polarizability, aniso12, aniso34;
...@@ -140,9 +132,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce ...@@ -140,9 +132,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
} }
paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f); paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
} }
particleParams->upload(paramVector); particleParams.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams->getDevicePointer(), "float4"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
} }
int startPairIndex = cu.getContextIndex()*force.getNumScreenedPairs()/numContexts; int startPairIndex = cu.getContextIndex()*force.getNumScreenedPairs()/numContexts;
...@@ -152,7 +144,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce ...@@ -152,7 +144,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
// Create the screened interaction between dipole pairs. // Create the screened interaction between dipole pairs.
vector<vector<int> > atoms(numPairs, vector<int>(4)); vector<vector<int> > atoms(numPairs, vector<int>(4));
pairParams = CudaArray::create<float2>(cu, numPairs, "drudePairParams"); pairParams.initialize<float2>(cu, numPairs, "drudePairParams");
vector<float2> paramVector(numPairs); vector<float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) { for (int i = 0; i < numPairs; i++) {
int drude1, drude2; int drude1, drude2;
...@@ -166,9 +158,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce ...@@ -166,9 +158,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
double energyScale = ONE_4PI_EPS0*charge1*charge2; double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = make_float2((float) screeningScale, (float) energyScale); paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
} }
pairParams->upload(paramVector); pairParams.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams->getDevicePointer(), "float2"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudePairForce, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
} }
cu.addForce(new CudaDrudeForceInfo(force)); cu.addForce(new CudaDrudeForceInfo(force));
...@@ -187,7 +179,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con ...@@ -187,7 +179,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
int endParticleIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts; int endParticleIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
int numParticles = endParticleIndex-startParticleIndex; int numParticles = endParticleIndex-startParticleIndex;
if (numParticles > 0) { if (numParticles > 0) {
if (particleParams == NULL || numParticles != particleParams->getSize()) if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
throw OpenMMException("updateParametersInContext: The number of Drude particles has changed"); throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
vector<float4> paramVector(numParticles); vector<float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -206,7 +198,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con ...@@ -206,7 +198,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
k2 = 0; k2 = 0;
paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f); paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
} }
particleParams->upload(paramVector); particleParams.upload(paramVector);
} }
// Set the pair parameters. // Set the pair parameters.
...@@ -215,7 +207,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con ...@@ -215,7 +207,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
int endPairIndex = (cu.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts; int endPairIndex = (cu.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts;
int numPairs = endPairIndex-startPairIndex; int numPairs = endPairIndex-startPairIndex;
if (numPairs > 0) { if (numPairs > 0) {
if (pairParams == NULL || numPairs != pairParams->getSize()) if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
throw OpenMMException("updateParametersInContext: The number of screened pairs has changed"); throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
vector<float2> paramVector(numPairs); vector<float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) { for (int i = 0; i < numPairs; i++) {
...@@ -230,17 +222,10 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con ...@@ -230,17 +222,10 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
double energyScale = ONE_4PI_EPS0*charge1*charge2; double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = make_float2((float) screeningScale, (float) energyScale); paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
} }
pairParams->upload(paramVector); pairParams.upload(paramVector);
} }
} }
CudaIntegrateDrudeLangevinStepKernel::~CudaIntegrateDrudeLangevinStepKernel() {
if (normalParticles != NULL)
delete normalParticles;
if (pairParticles != NULL)
delete pairParticles;
}
void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) { void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
...@@ -261,12 +246,12 @@ void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, cons ...@@ -261,12 +246,12 @@ void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, cons
pairParticleVec.push_back(make_int2(p, p1)); pairParticleVec.push_back(make_int2(p, p1));
} }
normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end()); normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
normalParticles = CudaArray::create<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles"); normalParticles.initialize<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles = CudaArray::create<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles"); pairParticles.initialize<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
if (normalParticleVec.size() > 0) if (normalParticleVec.size() > 0)
normalParticles->upload(normalParticleVec); normalParticles.upload(normalParticleVec);
if (pairParticleVec.size() > 0) if (pairParticleVec.size() > 0)
pairParticles->upload(pairParticleVec); pairParticles.upload(pairParticleVec);
// Create kernels. // Create kernels.
...@@ -345,9 +330,9 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D ...@@ -345,9 +330,9 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
// Call the first integration kernel. // Call the first integration kernel.
int randomIndex = integration.prepareRandomNumbers(normalParticles->getSize()+2*pairParticles->getSize()); int randomIndex = integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize());
void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(), void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&normalParticles->getDevicePointer(), &pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(), &normalParticles.getDevicePointer(), &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(),
vscalePtr, fscalePtr, noisescalePtr, vscaleDrudePtr, fscaleDrudePtr, noisescaleDrudePtr, &integration.getRandom().getDevicePointer(), &randomIndex}; vscalePtr, fscalePtr, noisescalePtr, vscaleDrudePtr, fscaleDrudePtr, noisescaleDrudePtr, &integration.getRandom().getDevicePointer(), &randomIndex};
cu.executeKernel(kernel1, args1, numAtoms); cu.executeKernel(kernel1, args1, numAtoms);
...@@ -366,8 +351,8 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D ...@@ -366,8 +351,8 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
if (maxDrudeDistance > 0) { if (maxDrudeDistance > 0) {
void* hardwallArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(), void* hardwallArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(),
&pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr}; &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles->getSize()); cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles.getSize());
} }
integration.computeVirtualSites(); integration.computeVirtualSites();
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,9 +45,8 @@ namespace OpenMM { ...@@ -45,9 +45,8 @@ namespace OpenMM {
class CudaCalcDrudeForceKernel : public CalcDrudeForceKernel { class CudaCalcDrudeForceKernel : public CalcDrudeForceKernel {
public: public:
CudaCalcDrudeForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CudaCalcDrudeForceKernel(std::string name, const Platform& platform, CudaContext& cu) :
CalcDrudeForceKernel(name, platform), cu(cu), particleParams(NULL), pairParams(NULL) { CalcDrudeForceKernel(name, platform), cu(cu) {
} }
~CudaCalcDrudeForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -73,8 +72,8 @@ public: ...@@ -73,8 +72,8 @@ public:
void copyParametersToContext(ContextImpl& context, const DrudeForce& force); void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private: private:
CudaContext& cu; CudaContext& cu;
CudaArray* particleParams; CudaArray particleParams;
CudaArray* pairParams; CudaArray pairParams;
}; };
/** /**
...@@ -83,9 +82,8 @@ private: ...@@ -83,9 +82,8 @@ private:
class CudaIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel { class CudaIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public: public:
CudaIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : CudaIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateDrudeLangevinStepKernel(name, platform), cu(cu), normalParticles(NULL), pairParticles(NULL) { IntegrateDrudeLangevinStepKernel(name, platform), cu(cu) {
} }
~CudaIntegrateDrudeLangevinStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -111,8 +109,8 @@ public: ...@@ -111,8 +109,8 @@ public:
private: private:
CudaContext& cu; CudaContext& cu;
double prevStepSize; double prevStepSize;
CudaArray* normalParticles; CudaArray normalParticles;
CudaArray* pairParticles; CudaArray pairParticles;
CUfunction kernel1, kernel2, hardwallKernel; CUfunction kernel1, kernel2, hardwallKernel;
}; };
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) { ...@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
} }
} }
CudaIntegrateRPMDStepKernel::~CudaIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) { void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies(); numCopies = integrator.getNumCopies();
...@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt ...@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
int paddedParticles = cu.getPaddedNumAtoms(); int paddedParticles = cu.getPaddedNumAtoms();
bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()); bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4)); int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4));
forces = CudaArray::create<long long>(cu, numCopies*paddedParticles*3, "rpmdForces"); forces.initialize<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
positions = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdPositions"); positions.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities"); velocities.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans. // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if (useDoublePrecision) { if (useDoublePrecision) {
vector<double4> temp(positions->getSize()); vector<double4> temp(positions.getSize());
for (int i = 0; i < positions->getSize(); i++) for (int i = 0; i < positions.getSize(); i++)
temp[i] = make_double4(0, 0, 0, 0); temp[i] = make_double4(0, 0, 0, 0);
positions->upload(temp); positions.upload(temp);
for (int i = 0; i < velocities->getSize(); i++) for (int i = 0; i < velocities.getSize(); i++)
temp[i] = make_double4(0, 0, 0, 1); temp[i] = make_double4(0, 0, 0, 1);
velocities->upload(temp); velocities.upload(temp);
} }
else { else {
vector<float4> temp(positions->getSize()); vector<float4> temp(positions.getSize());
for (int i = 0; i < positions->getSize(); i++) for (int i = 0; i < positions.getSize(); i++)
temp[i] = make_float4(0, 0, 0, 0); temp[i] = make_float4(0, 0, 0, 0);
positions->upload(temp); positions.upload(temp);
for (int i = 0; i < velocities->getSize(); i++) for (int i = 0; i < velocities.getSize(); i++)
temp[i] = make_float4(0, 0, 0, 1); temp[i] = make_float4(0, 0, 0, 1);
velocities->upload(temp); velocities.upload(temp);
} }
// Build a list of contractions. // Build a list of contractions.
...@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt ...@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
} }
} }
if (maxContractedCopies > 0) { if (maxContractedCopies > 0) {
contractedForces = CudaArray::create<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces"); contractedForces.initialize<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
contractedPositions = new CudaArray(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions"); contractedPositions.initialize(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
} }
// Create kernels. // Create kernels.
...@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr ...@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
float frictionFloat = (float) friction; float frictionFloat = (float) friction;
void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat); void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat);
int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies); int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies);
void* pileArgs[] = {&velocities->getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr}; void* pileArgs[] = {&velocities.getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
if (integrator.getApplyThermostat()) if (integrator.getApplyThermostat())
cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize); cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize);
// Update positions and velocities. // Update positions and velocities.
void* stepArgs[] = {&positions->getDevicePointer(), &velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr, kTPtr}; void* stepArgs[] = {&positions.getDevicePointer(), &velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr, kTPtr};
cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize); cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize);
// Calculate forces based on the updated positions. // Calculate forces based on the updated positions.
...@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr ...@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// Update velocities. // Update velocities.
void* velocitiesArgs[] = {&velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr}; void* velocitiesArgs[] = {&velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr};
cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize); cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize);
// Apply the PILE-L thermostat again. // Apply the PILE-L thermostat again.
...@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr ...@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// the same translation to all the beads. // the same translation to all the beads.
int i = numCopies-1; int i = numCopies-1;
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; void* args[] = {&positions.getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms()); cu.executeKernel(translateKernel, args, cu.getNumAtoms());
} }
} }
...@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction. // Compute forces from all groups that didn't have a specified contraction.
for (int i = 0; i < numCopies; i++) { for (int i = 0; i < numCopies; i++) {
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(), void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms()); cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
context.computeVirtualSites(); context.computeVirtualSites();
...@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if (initialBox[0] != finalBox[0] || initialBox[1] != finalBox[1] || initialBox[2] != finalBox[2]) if (initialBox[0] != finalBox[0] || initialBox[1] != finalBox[1] || initialBox[2] != finalBox[2])
throw OpenMMException("Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead."); throw OpenMMException("Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead.");
context.calcForcesAndEnergy(true, false, groupsNotContracted); context.calcForcesAndEnergy(true, false, groupsNotContracted);
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(), void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; &velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms()); cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
} }
...@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Find the contracted positions. // Find the contracted positions.
void* contractPosArgs[] = {&positions->getDevicePointer(), &contractedPositions->getDevicePointer()}; void* contractPosArgs[] = {&positions.getDevicePointer(), &contractedPositions.getDevicePointer()};
cu.executeKernel(positionContractionKernels[copies], contractPosArgs, numParticles*numCopies, workgroupSize); cu.executeKernel(positionContractionKernels[copies], contractPosArgs, numParticles*numCopies, workgroupSize);
// Compute forces. // Compute forces.
for (int i = 0; i < copies; i++) { for (int i = 0; i < copies; i++) {
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions->getDevicePointer(), void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms()); cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
context.computeVirtualSites(); context.computeVirtualSites();
context.calcForcesAndEnergy(true, false, groupFlags); context.calcForcesAndEnergy(true, false, groupFlags);
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces->getDevicePointer(), &cu.getVelm().getDevicePointer(), void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; &velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms()); cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
} }
// Apply the forces to the original copies. // Apply the forces to the original copies.
void* contractForceArgs[] = {&forces->getDevicePointer(), &contractedForces->getDevicePointer()}; void* contractForceArgs[] = {&forces.getDevicePointer(), &contractedForces.getDevicePointer()};
cu.executeKernel(forceContractionKernels[copies], contractForceArgs, numParticles*numCopies, workgroupSize); cu.executeKernel(forceContractionKernels[copies], contractForceArgs, numParticles*numCopies, workgroupSize);
} }
if (groupsByCopies.size() > 0) { if (groupsByCopies.size() > 0) {
// Ensure the Context contains the positions from the last copy, since we'll assume that later. // Ensure the Context contains the positions from the last copy, since we'll assume that later.
int i = numCopies-1; int i = numCopies-1;
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(), void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms()); cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
} }
...@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c ...@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
} }
void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) { void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL) if (!positions.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles) if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
...@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos ...@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
cu.getPosq().download(posq); cu.getPosq().download(posq);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w); posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4)); result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
} }
else if (cu.getUseMixedPrecision()) { else if (cu.getUseMixedPrecision()) {
vector<float4> posqf(cu.getPaddedNumAtoms()); vector<float4> posqf(cu.getPaddedNumAtoms());
...@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos ...@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
vector<double4> posq(cu.getPaddedNumAtoms()); vector<double4> posq(cu.getPaddedNumAtoms());
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w); posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4)); result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
} }
else { else {
vector<float4> posq(cu.getPaddedNumAtoms()); vector<float4> posq(cu.getPaddedNumAtoms());
cu.getPosq().download(posq); cu.getPosq().download(posq);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = make_float4((float) offsetPos[i][0], (float) offsetPos[i][1], (float) offsetPos[i][2], posq[i].w); posq[i] = make_float4((float) offsetPos[i][0], (float) offsetPos[i][1], (float) offsetPos[i][2], posq[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4)); result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
} }
if (result != CUDA_SUCCESS) { if (result != CUDA_SUCCESS) {
std::stringstream str; std::stringstream str;
str<<"Error uploading array "<<positions->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")"; str<<"Error uploading array "<<positions.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str()); throw OpenMMException(str.str());
} }
} }
void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) { void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL) if (!velocities.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles) if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
...@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve ...@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
cu.getVelm().download(velm); cu.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
velm[i] = make_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w); velm[i] = make_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4)); result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
} }
else { else {
vector<float4> velm(cu.getPaddedNumAtoms()); vector<float4> velm(cu.getPaddedNumAtoms());
cu.getVelm().download(velm); cu.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
velm[i] = make_float4((float) vel[i][0], (float) vel[i][1], (float) vel[i][2], velm[i].w); velm[i] = make_float4((float) vel[i][0], (float) vel[i][1], (float) vel[i][2], velm[i].w);
result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4)); result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
} }
if (result != CUDA_SUCCESS) { if (result != CUDA_SUCCESS) {
std::stringstream str; std::stringstream str;
str<<"Error uploading array "<<velocities->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")"; str<<"Error uploading array "<<velocities.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str()); throw OpenMMException(str.str());
} }
} }
void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) { void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
void* copyArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(), void* copyArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &copy}; &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &copy};
cu.executeKernel(copyToContextKernel, copyArgs, cu.getNumAtoms()); cu.executeKernel(copyToContextKernel, copyArgs, cu.getNumAtoms());
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -46,9 +46,8 @@ namespace OpenMM { ...@@ -46,9 +46,8 @@ namespace OpenMM {
class CudaIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel { class CudaIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public: public:
CudaIntegrateRPMDStepKernel(std::string name, const Platform& platform, CudaContext& cu) : CudaIntegrateRPMDStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateRPMDStepKernel(name, platform), cu(cu), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) { IntegrateRPMDStepKernel(name, platform), cu(cu) {
} }
~CudaIntegrateRPMDStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -91,11 +90,11 @@ private: ...@@ -91,11 +90,11 @@ private:
int numCopies, numParticles, workgroupSize; int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies; std::map<int, int> groupsByCopies;
int groupsNotContracted; int groupsNotContracted;
CudaArray* forces; CudaArray forces;
CudaArray* positions; CudaArray positions;
CudaArray* velocities; CudaArray velocities;
CudaArray* contractedForces; CudaArray contractedForces;
CudaArray* contractedPositions; CudaArray contractedPositions;
CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel; CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, CUfunction> positionContractionKernels; std::map<int, CUfunction> positionContractionKernels;
std::map<int, CUfunction> forceContractionKernels; std::map<int, CUfunction> forceContractionKernels;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment