Commit 372f1724 authored by Peter Eastman's avatar Peter Eastman
Browse files

Finished converting CudaArrays.

parent f15f591b
......@@ -1154,12 +1154,8 @@ private:
class CudaCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public:
CudaCalcGayBerneForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGayBerneForceKernel(name, platform), cu(cu),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL),
exceptionParams(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
hasInitializedKernels(false) {
}
~CudaCalcGayBerneForceKernel();
/**
* Initialize the kernel.
*
......@@ -1191,25 +1187,25 @@ private:
bool hasInitializedKernels;
int numRealParticles, numExceptions, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod;
CudaArray* sortedParticles;
CudaArray* axisParticleIndices;
CudaArray* sigParams;
CudaArray* epsParams;
CudaArray* scale;
CudaArray* exceptionParticles;
CudaArray* exceptionParams;
CudaArray* aMatrix;
CudaArray* bMatrix;
CudaArray* gMatrix;
CudaArray* exclusions;
CudaArray* exclusionStartIndex;
CudaArray* blockCenter;
CudaArray* blockBoundingBox;
CudaArray* neighbors;
CudaArray* neighborIndex;
CudaArray* neighborBlockCount;
CudaArray* sortedPos;
CudaArray* torque;
CudaArray sortedParticles;
CudaArray axisParticleIndices;
CudaArray sigParams;
CudaArray epsParams;
CudaArray scale;
CudaArray exceptionParticles;
CudaArray exceptionParams;
CudaArray aMatrix;
CudaArray bMatrix;
CudaArray gMatrix;
CudaArray exclusions;
CudaArray exclusionStartIndex;
CudaArray blockCenter;
CudaArray blockBoundingBox;
CudaArray neighbors;
CudaArray neighborIndex;
CudaArray neighborBlockCount;
CudaArray sortedPos;
CudaArray torque;
std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs;
......@@ -1224,9 +1220,8 @@ private:
class CudaCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public:
CudaCalcCustomCVForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcCustomCVForceKernel(name, platform),
cu(cu), hasInitializedListeners(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) {
cu(cu), hasInitializedListeners(false) {
}
~CudaCalcCustomCVForceKernel();
/**
* Initialize the kernel.
*
......@@ -1260,9 +1255,9 @@ private:
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<CudaArray*> cvForces;
CudaArray* invAtomOrder;
CudaArray* innerInvAtomOrder;
std::vector<CudaArray> cvForces;
CudaArray invAtomOrder;
CudaArray innerInvAtomOrder;
CUfunction copyStateKernel, copyForcesKernel, addForcesKernel;
};
......@@ -1271,10 +1266,8 @@ private:
*/
class CudaCalcRMSDForceKernel : public CalcRMSDForceKernel {
public:
CudaCalcRMSDForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcRMSDForceKernel(name, platform),
cu(cu), referencePos(NULL), particles(NULL), buffer(NULL) {
CudaCalcRMSDForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcRMSDForceKernel(name, platform), cu(cu) {
}
~CudaCalcRMSDForceKernel();
/**
* Initialize the kernel.
*
......@@ -1313,9 +1306,9 @@ private:
CudaContext& cu;
ForceInfo* info;
double sumNormRef;
CudaArray* referencePos;
CudaArray* particles;
CudaArray* buffer;
CudaArray referencePos;
CudaArray particles;
CudaArray buffer;
CUfunction kernel1, kernel2;
};
......@@ -1326,7 +1319,6 @@ class CudaIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
public:
CudaIntegrateVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVerletStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateVerletStepKernel();
/**
* Initialize the kernel.
*
......@@ -1358,9 +1350,8 @@ private:
*/
class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public:
CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu), params(NULL) {
CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateLangevinStepKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
......@@ -1385,7 +1376,7 @@ public:
private:
CudaContext& cu;
double prevTemp, prevFriction, prevStepSize;
CudaArray* params;
CudaArray params;
CUfunction kernel1, kernel2;
};
......@@ -1396,7 +1387,6 @@ class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
public:
CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBrownianStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateBrownianStepKernel();
/**
* Initialize the kernel.
*
......@@ -1431,7 +1421,6 @@ class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStep
public:
CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableVerletStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateVariableVerletStepKernel();
/**
* Initialize the kernel.
*
......@@ -1466,10 +1455,8 @@ private:
*/
class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
public:
CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform),
cu(cu), params(NULL) {
CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateVariableLangevinStepKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
......@@ -1496,7 +1483,7 @@ public:
private:
CudaContext& cu;
int blockSize;
CudaArray* params;
CudaArray params;
CUfunction kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol;
};
......@@ -1508,8 +1495,7 @@ class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
}
~CudaIntegrateCustomStepKernel();
/**
......@@ -1590,15 +1576,15 @@ private:
int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent;
CudaArray* globalValues;
CudaArray* sumBuffer;
CudaArray* summedValue;
CudaArray* uniformRandoms;
CudaArray* randomSeed;
CudaArray* perDofEnergyParamDerivs;
std::vector<CudaArray*> tabulatedFunctions;
CudaArray globalValues;
CudaArray sumBuffer;
CudaArray summedValue;
CudaArray uniformRandoms;
CudaArray randomSeed;
CudaArray perDofEnergyParamDerivs;
std::vector<CudaArray> tabulatedFunctions;
std::map<int, double> savedEnergy;
std::map<int, CudaArray*> savedForces;
std::map<int, CudaArray> savedForces;
std::set<int> validSavedForces;
CudaParameterSet* perDofValues;
mutable std::vector<std::vector<float> > localPerDofValuesFloat;
......@@ -1651,10 +1637,8 @@ public:
*/
class CudaApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
public:
CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyAndersenThermostatKernel(name, platform), cu(cu),
atomGroups(NULL) {
CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyAndersenThermostatKernel(name, platform), cu(cu) {
}
~CudaApplyAndersenThermostatKernel();
/**
* Initialize the kernel.
*
......@@ -1671,7 +1655,7 @@ public:
private:
CudaContext& cu;
int randomSeed;
CudaArray* atomGroups;
CudaArray atomGroups;
CUfunction kernel;
};
......@@ -1681,9 +1665,8 @@ private:
class CudaApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public:
CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyMonteCarloBarostatKernel(name, platform), cu(cu),
hasInitializedKernels(false), savedPositions(NULL), savedForces(NULL), moleculeAtoms(NULL), moleculeStartIndex(NULL) {
hasInitializedKernels(false) {
}
~CudaApplyMonteCarloBarostatKernel();
/**
* Initialize the kernel.
*
......@@ -1715,10 +1698,10 @@ private:
CudaContext& cu;
bool hasInitializedKernels;
int numMolecules;
CudaArray* savedPositions;
CudaArray* savedForces;
CudaArray* moleculeAtoms;
CudaArray* moleculeStartIndex;
CudaArray savedPositions;
CudaArray savedForces;
CudaArray moleculeAtoms;
CudaArray moleculeStartIndex;
CUfunction kernel;
std::vector<int> lastAtomOrder;
};
......@@ -1728,9 +1711,8 @@ private:
*/
class CudaRemoveCMMotionKernel : public RemoveCMMotionKernel {
public:
CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cu) : RemoveCMMotionKernel(name, platform), cu(cu), cmMomentum(NULL) {
CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cu) : RemoveCMMotionKernel(name, platform), cu(cu) {
}
~CudaRemoveCMMotionKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
......@@ -1747,7 +1729,7 @@ public:
private:
CudaContext& cu;
int frequency;
CudaArray* cmMomentum;
CudaArray cmMomentum;
CUfunction kernel1, kernel2;
};
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -84,7 +84,7 @@ private:
std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions;
int2* interactionCounts;
CudaArray* contextForces;
CudaArray contextForces;
void* pinnedPositionBuffer;
long long* pinnedForceBuffer;
CUfunction sumKernel;
......
This diff is collapsed.
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -142,15 +142,13 @@ private:
CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
interactionCounts(NULL), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
interactionCounts(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
}
CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent();
if (contextForces != NULL)
delete contextForces;
if (pinnedPositionBuffer != NULL)
cuMemFreeHost(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL)
......@@ -179,8 +177,8 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
CudaContext& cu = *data.contexts[0];
cu.setAsCurrent();
if (contextForces == NULL) {
contextForces = CudaArray::create<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
if (!contextForces.isInitialized()) {
contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory");
CHECK_RESULT(cuMemHostAlloc(&pinnedPositionBuffer, cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4)), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory");
}
......@@ -211,7 +209,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, *contextForces, valid, interactionCounts[i]));
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i]));
}
data.syncContexts();
double energy = 0.0;
......@@ -222,10 +220,10 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
CudaContext& cu = *data.contexts[0];
if (!cu.getPlatformData().peerAccessSupported)
contextForces->upload(pinnedForceBuffer, false);
contextForces.upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms();
int numBuffers = data.contexts.size()-1;
void* args[] = {&cu.getForce().getDevicePointer(), &contextForces->getDevicePointer(), &bufferSize, &numBuffers};
void* args[] = {&cu.getForce().getDevicePointer(), &contextForces.getDevicePointer(), &bufferSize, &numBuffers};
cu.executeKernel(sumKernel, args, bufferSize);
// Balance work between the contexts by transferring a little nonbonded work from the context that
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Peter Eastman *
* Contributors: *
* *
......@@ -48,7 +48,6 @@ public:
const Platform& platform,
CudaContext& cu,
const System& system);
~CudaCalcAmoebaBondForceKernel();
/**
* Initialize the kernel.
*
......@@ -77,7 +76,7 @@ private:
int numBonds;
CudaContext& cu;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -86,7 +85,6 @@ private:
class CudaCalcAmoebaAngleForceKernel : public CalcAmoebaAngleForceKernel {
public:
CudaCalcAmoebaAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaAngleForceKernel();
/**
* Initialize the kernel.
*
......@@ -115,7 +113,7 @@ private:
int numAngles;
CudaContext& cu;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -124,7 +122,6 @@ private:
class CudaCalcAmoebaInPlaneAngleForceKernel : public CalcAmoebaInPlaneAngleForceKernel {
public:
CudaCalcAmoebaInPlaneAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaInPlaneAngleForceKernel();
/**
* Initialize the kernel.
*
......@@ -153,7 +150,7 @@ private:
int numAngles;
CudaContext& cu;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -162,7 +159,6 @@ private:
class CudaCalcAmoebaPiTorsionForceKernel : public CalcAmoebaPiTorsionForceKernel {
public:
CudaCalcAmoebaPiTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaPiTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -191,7 +187,7 @@ private:
int numPiTorsions;
CudaContext& cu;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -200,7 +196,6 @@ private:
class CudaCalcAmoebaStretchBendForceKernel : public CalcAmoebaStretchBendForceKernel {
public:
CudaCalcAmoebaStretchBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaStretchBendForceKernel();
/**
* Initialize the kernel.
*
......@@ -229,8 +224,8 @@ private:
int numStretchBends;
CudaContext& cu;
const System& system;
CudaArray* params1; // Equilibrium values
CudaArray* params2; // force constants
CudaArray params1; // Equilibrium values
CudaArray params2; // force constants
};
/**
......@@ -239,7 +234,6 @@ private:
class CudaCalcAmoebaOutOfPlaneBendForceKernel : public CalcAmoebaOutOfPlaneBendForceKernel {
public:
CudaCalcAmoebaOutOfPlaneBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaOutOfPlaneBendForceKernel();
/**
* Initialize the kernel.
*
......@@ -268,7 +262,7 @@ private:
int numOutOfPlaneBends;
CudaContext& cu;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -277,7 +271,6 @@ private:
class CudaCalcAmoebaTorsionTorsionForceKernel : public CalcAmoebaTorsionTorsionForceKernel {
public:
CudaCalcAmoebaTorsionTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaTorsionTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -300,9 +293,9 @@ private:
int numTorsionTorsionGrids;
CudaContext& cu;
const System& system;
CudaArray* gridValues;
CudaArray* gridParams;
CudaArray* torsionParams;
CudaArray gridValues;
CudaArray gridParams;
CudaArray torsionParams;
};
/**
......@@ -414,58 +407,58 @@ private:
const System& system;
std::vector<int3> covalentFlagValues;
std::vector<int2> polarizationFlagValues;
CudaArray* multipoleParticles;
CudaArray* molecularDipoles;
CudaArray* molecularQuadrupoles;
CudaArray* labFrameDipoles;
CudaArray* labFrameQuadrupoles;
CudaArray* sphericalDipoles;
CudaArray* sphericalQuadrupoles;
CudaArray* fracDipoles;
CudaArray* fracQuadrupoles;
CudaArray* field;
CudaArray* fieldPolar;
CudaArray* inducedField;
CudaArray* inducedFieldPolar;
CudaArray* torque;
CudaArray* dampingAndThole;
CudaArray* inducedDipole;
CudaArray* inducedDipolePolar;
CudaArray* inducedDipoleErrors;
CudaArray* prevDipoles;
CudaArray* prevDipolesPolar;
CudaArray* prevDipolesGk;
CudaArray* prevDipolesGkPolar;
CudaArray* prevErrors;
CudaArray* diisMatrix;
CudaArray* diisCoefficients;
CudaArray* extrapolatedDipole;
CudaArray* extrapolatedDipolePolar;
CudaArray* extrapolatedDipoleGk;
CudaArray* extrapolatedDipoleGkPolar;
CudaArray* inducedDipoleFieldGradient;
CudaArray* inducedDipoleFieldGradientPolar;
CudaArray* inducedDipoleFieldGradientGk;
CudaArray* inducedDipoleFieldGradientGkPolar;
CudaArray* extrapolatedDipoleFieldGradient;
CudaArray* extrapolatedDipoleFieldGradientPolar;
CudaArray* extrapolatedDipoleFieldGradientGk;
CudaArray* extrapolatedDipoleFieldGradientGkPolar;
CudaArray* polarizability;
CudaArray* covalentFlags;
CudaArray* polarizationGroupFlags;
CudaArray* pmeGrid;
CudaArray* pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ;
CudaArray* pmeIgrid;
CudaArray* pmePhi;
CudaArray* pmePhid;
CudaArray* pmePhip;
CudaArray* pmePhidp;
CudaArray* pmeCphi;
CudaArray* pmeAtomRange;
CudaArray* lastPositions;
CudaArray multipoleParticles;
CudaArray molecularDipoles;
CudaArray molecularQuadrupoles;
CudaArray labFrameDipoles;
CudaArray labFrameQuadrupoles;
CudaArray sphericalDipoles;
CudaArray sphericalQuadrupoles;
CudaArray fracDipoles;
CudaArray fracQuadrupoles;
CudaArray field;
CudaArray fieldPolar;
CudaArray inducedField;
CudaArray inducedFieldPolar;
CudaArray torque;
CudaArray dampingAndThole;
CudaArray inducedDipole;
CudaArray inducedDipolePolar;
CudaArray inducedDipoleErrors;
CudaArray prevDipoles;
CudaArray prevDipolesPolar;
CudaArray prevDipolesGk;
CudaArray prevDipolesGkPolar;
CudaArray prevErrors;
CudaArray diisMatrix;
CudaArray diisCoefficients;
CudaArray extrapolatedDipole;
CudaArray extrapolatedDipolePolar;
CudaArray extrapolatedDipoleGk;
CudaArray extrapolatedDipoleGkPolar;
CudaArray inducedDipoleFieldGradient;
CudaArray inducedDipoleFieldGradientPolar;
CudaArray inducedDipoleFieldGradientGk;
CudaArray inducedDipoleFieldGradientGkPolar;
CudaArray extrapolatedDipoleFieldGradient;
CudaArray extrapolatedDipoleFieldGradientPolar;
CudaArray extrapolatedDipoleFieldGradientGk;
CudaArray extrapolatedDipoleFieldGradientGkPolar;
CudaArray polarizability;
CudaArray covalentFlags;
CudaArray polarizationGroupFlags;
CudaArray pmeGrid;
CudaArray pmeBsplineModuliX;
CudaArray pmeBsplineModuliY;
CudaArray pmeBsplineModuliZ;
CudaArray pmeIgrid;
CudaArray pmePhi;
CudaArray pmePhid;
CudaArray pmePhip;
CudaArray pmePhidp;
CudaArray pmeCphi;
CudaArray pmeAtomRange;
CudaArray lastPositions;
CudaSort* sort;
cufftHandle fft;
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
......@@ -486,7 +479,6 @@ private:
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel : public CalcAmoebaGeneralizedKirkwoodForceKernel {
public:
CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaGeneralizedKirkwoodForceKernel();
/**
* Initialize the kernel.
*
......@@ -511,22 +503,22 @@ public:
* Perform the final parts of the force/energy computation.
*/
void finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags);
CudaArray* getBornRadii() {
CudaArray& getBornRadii() {
return bornRadii;
}
CudaArray* getField() {
CudaArray& getField() {
return field;
}
CudaArray* getInducedField() {
CudaArray& getInducedField() {
return inducedField;
}
CudaArray* getInducedFieldPolar() {
CudaArray& getInducedFieldPolar() {
return inducedFieldPolar;
}
CudaArray* getInducedDipoles() {
CudaArray& getInducedDipoles() {
return inducedDipoleS;
}
CudaArray* getInducedDipolesPolar() {
CudaArray& getInducedDipolesPolar() {
return inducedDipolePolarS;
}
/**
......@@ -544,15 +536,15 @@ private:
int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads;
AmoebaMultipoleForce::PolarizationType polarizationType;
std::map<std::string, std::string> defines;
CudaArray* params;
CudaArray* bornSum;
CudaArray* bornRadii;
CudaArray* bornForce;
CudaArray* field;
CudaArray* inducedField;
CudaArray* inducedFieldPolar;
CudaArray* inducedDipoleS;
CudaArray* inducedDipolePolarS;
CudaArray params;
CudaArray bornSum;
CudaArray bornRadii;
CudaArray bornForce;
CudaArray field;
CudaArray inducedField;
CudaArray inducedFieldPolar;
CudaArray inducedDipoleS;
CudaArray inducedDipolePolarS;
CUfunction computeBornSumKernel, reduceBornSumKernel, surfaceAreaKernel, gkForceKernel, chainRuleKernel, ediffKernel;
};
......@@ -592,11 +584,11 @@ private:
const System& system;
bool hasInitializedNonbonded;
double dispersionCoefficient;
CudaArray* sigmaEpsilon;
CudaArray* bondReductionAtoms;
CudaArray* bondReductionFactors;
CudaArray* tempPosq;
CudaArray* tempForces;
CudaArray sigmaEpsilon;
CudaArray bondReductionAtoms;
CudaArray bondReductionFactors;
CudaArray tempPosq;
CudaArray tempForces;
CudaNonbondedUtilities* nonbonded;
CUfunction prepareKernel, spreadKernel;
};
......@@ -607,7 +599,6 @@ private:
class CudaCalcAmoebaWcaDispersionForceKernel : public CalcAmoebaWcaDispersionForceKernel {
public:
CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
~CudaCalcAmoebaWcaDispersionForceKernel();
/**
* Initialize the kernel.
*
......@@ -636,7 +627,7 @@ private:
CudaContext& cu;
const System& system;
double totalMaximumDispersionEnergy;
CudaArray* radiusEpsilon;
CudaArray radiusEpsilon;
CUfunction forceKernel;
};
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -100,14 +100,6 @@ private:
const DrudeForce& force;
};
CudaCalcDrudeForceKernel::~CudaCalcDrudeForceKernel() {
cu.setAsCurrent();
if (particleParams != NULL)
delete particleParams;
if (pairParams != NULL)
delete pairParams;
}
void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
......@@ -118,7 +110,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
// Create the harmonic interaction .
vector<vector<int> > atoms(numParticles, vector<int>(5));
particleParams = CudaArray::create<float4>(cu, numParticles, "drudeParticleParams");
particleParams.initialize<float4>(cu, numParticles, "drudeParticleParams");
vector<float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
double charge, polarizability, aniso12, aniso34;
......@@ -140,9 +132,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
}
paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
}
particleParams->upload(paramVector);
particleParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams->getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
}
int startPairIndex = cu.getContextIndex()*force.getNumScreenedPairs()/numContexts;
......@@ -152,7 +144,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
// Create the screened interaction between dipole pairs.
vector<vector<int> > atoms(numPairs, vector<int>(4));
pairParams = CudaArray::create<float2>(cu, numPairs, "drudePairParams");
pairParams.initialize<float2>(cu, numPairs, "drudePairParams");
vector<float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) {
int drude1, drude2;
......@@ -166,9 +158,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
}
pairParams->upload(paramVector);
pairParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams->getDevicePointer(), "float2");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
}
cu.addForce(new CudaDrudeForceInfo(force));
......@@ -187,7 +179,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
int endParticleIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
int numParticles = endParticleIndex-startParticleIndex;
if (numParticles > 0) {
if (particleParams == NULL || numParticles != particleParams->getSize())
if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
vector<float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
......@@ -206,7 +198,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
k2 = 0;
paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
}
particleParams->upload(paramVector);
particleParams.upload(paramVector);
}
// Set the pair parameters.
......@@ -215,7 +207,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
int endPairIndex = (cu.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts;
int numPairs = endPairIndex-startPairIndex;
if (numPairs > 0) {
if (pairParams == NULL || numPairs != pairParams->getSize())
if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
vector<float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) {
......@@ -230,17 +222,10 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
}
pairParams->upload(paramVector);
pairParams.upload(paramVector);
}
}
CudaIntegrateDrudeLangevinStepKernel::~CudaIntegrateDrudeLangevinStepKernel() {
if (normalParticles != NULL)
delete normalParticles;
if (pairParticles != NULL)
delete pairParticles;
}
void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cu.getPlatformData().initializeContexts(system);
cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
......@@ -261,12 +246,12 @@ void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, cons
pairParticleVec.push_back(make_int2(p, p1));
}
normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
normalParticles = CudaArray::create<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles = CudaArray::create<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
normalParticles.initialize<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles.initialize<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
if (normalParticleVec.size() > 0)
normalParticles->upload(normalParticleVec);
normalParticles.upload(normalParticleVec);
if (pairParticleVec.size() > 0)
pairParticles->upload(pairParticleVec);
pairParticles.upload(pairParticleVec);
// Create kernels.
......@@ -345,9 +330,9 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
// Call the first integration kernel.
int randomIndex = integration.prepareRandomNumbers(normalParticles->getSize()+2*pairParticles->getSize());
int randomIndex = integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize());
void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&normalParticles->getDevicePointer(), &pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(),
&normalParticles.getDevicePointer(), &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(),
vscalePtr, fscalePtr, noisescalePtr, vscaleDrudePtr, fscaleDrudePtr, noisescaleDrudePtr, &integration.getRandom().getDevicePointer(), &randomIndex};
cu.executeKernel(kernel1, args1, numAtoms);
......@@ -366,8 +351,8 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
if (maxDrudeDistance > 0) {
void* hardwallArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(),
&pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles->getSize());
&pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles.getSize());
}
integration.computeVirtualSites();
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,9 +45,8 @@ namespace OpenMM {
class CudaCalcDrudeForceKernel : public CalcDrudeForceKernel {
public:
CudaCalcDrudeForceKernel(std::string name, const Platform& platform, CudaContext& cu) :
CalcDrudeForceKernel(name, platform), cu(cu), particleParams(NULL), pairParams(NULL) {
CalcDrudeForceKernel(name, platform), cu(cu) {
}
~CudaCalcDrudeForceKernel();
/**
* Initialize the kernel.
*
......@@ -73,8 +72,8 @@ public:
void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private:
CudaContext& cu;
CudaArray* particleParams;
CudaArray* pairParams;
CudaArray particleParams;
CudaArray pairParams;
};
/**
......@@ -83,9 +82,8 @@ private:
class CudaIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public:
CudaIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateDrudeLangevinStepKernel(name, platform), cu(cu), normalParticles(NULL), pairParticles(NULL) {
IntegrateDrudeLangevinStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateDrudeLangevinStepKernel();
/**
* Initialize the kernel.
*
......@@ -111,8 +109,8 @@ public:
private:
CudaContext& cu;
double prevStepSize;
CudaArray* normalParticles;
CudaArray* pairParticles;
CudaArray normalParticles;
CudaArray pairParticles;
CUfunction kernel1, kernel2, hardwallKernel;
};
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
}
}
CudaIntegrateRPMDStepKernel::~CudaIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies();
......@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
int paddedParticles = cu.getPaddedNumAtoms();
bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4));
forces = CudaArray::create<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
positions = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
forces.initialize<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
positions.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if (useDoublePrecision) {
vector<double4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<double4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = make_double4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = make_double4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
else {
vector<float4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<float4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = make_float4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = make_float4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
// Build a list of contractions.
......@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
}
}
if (maxContractedCopies > 0) {
contractedForces = CudaArray::create<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
contractedPositions = new CudaArray(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
contractedForces.initialize<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
contractedPositions.initialize(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
}
// Create kernels.
......@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
float frictionFloat = (float) friction;
void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat);
int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies);
void* pileArgs[] = {&velocities->getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
void* pileArgs[] = {&velocities.getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
if (integrator.getApplyThermostat())
cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize);
// Update positions and velocities.
void* stepArgs[] = {&positions->getDevicePointer(), &velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr, kTPtr};
void* stepArgs[] = {&positions.getDevicePointer(), &velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr, kTPtr};
cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize);
// Calculate forces based on the updated positions.
......@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// Update velocities.
void* velocitiesArgs[] = {&velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr};
void* velocitiesArgs[] = {&velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr};
cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize);
// Apply the PILE-L thermostat again.
......@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// the same translation to all the beads.
int i = numCopies-1;
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
void* args[] = {&positions.getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
}
......@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction.
for (int i = 0; i < numCopies; i++) {
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
context.computeVirtualSites();
......@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if (initialBox[0] != finalBox[0] || initialBox[1] != finalBox[1] || initialBox[2] != finalBox[2])
throw OpenMMException("Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead.");
context.calcForcesAndEnergy(true, false, groupsNotContracted);
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
}
......@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Find the contracted positions.
void* contractPosArgs[] = {&positions->getDevicePointer(), &contractedPositions->getDevicePointer()};
void* contractPosArgs[] = {&positions.getDevicePointer(), &contractedPositions.getDevicePointer()};
cu.executeKernel(positionContractionKernels[copies], contractPosArgs, numParticles*numCopies, workgroupSize);
// Compute forces.
for (int i = 0; i < copies; i++) {
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions->getDevicePointer(),
void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
context.computeVirtualSites();
context.calcForcesAndEnergy(true, false, groupFlags);
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
}
// Apply the forces to the original copies.
void* contractForceArgs[] = {&forces->getDevicePointer(), &contractedForces->getDevicePointer()};
void* contractForceArgs[] = {&forces.getDevicePointer(), &contractedForces.getDevicePointer()};
cu.executeKernel(forceContractionKernels[copies], contractForceArgs, numParticles*numCopies, workgroupSize);
}
if (groupsByCopies.size() > 0) {
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
int i = numCopies-1;
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
}
......@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
}
void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL)
if (!positions.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
......@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
}
else if (cu.getUseMixedPrecision()) {
vector<float4> posqf(cu.getPaddedNumAtoms());
......@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
vector<double4> posq(cu.getPaddedNumAtoms());
for (int i = 0; i < numParticles; i++)
posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
}
else {
vector<float4> posq(cu.getPaddedNumAtoms());
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = make_float4((float) offsetPos[i][0], (float) offsetPos[i][1], (float) offsetPos[i][2], posq[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
}
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error uploading array "<<positions->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
str<<"Error uploading array "<<positions.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL)
if (!velocities.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
......@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = make_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
}
else {
vector<float4> velm(cu.getPaddedNumAtoms());
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = make_float4((float) vel[i][0], (float) vel[i][1], (float) vel[i][2], velm[i].w);
result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
}
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error uploading array "<<velocities->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
str<<"Error uploading array "<<velocities.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
void* copyArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
void* copyArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &copy};
cu.executeKernel(copyToContextKernel, copyArgs, cu.getNumAtoms());
}
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -46,9 +46,8 @@ namespace OpenMM {
class CudaIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public:
CudaIntegrateRPMDStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateRPMDStepKernel(name, platform), cu(cu), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
IntegrateRPMDStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateRPMDStepKernel();
/**
* Initialize the kernel.
*
......@@ -91,11 +90,11 @@ private:
int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies;
int groupsNotContracted;
CudaArray* forces;
CudaArray* positions;
CudaArray* velocities;
CudaArray* contractedForces;
CudaArray* contractedPositions;
CudaArray forces;
CudaArray positions;
CudaArray velocities;
CudaArray contractedForces;
CudaArray contractedPositions;
CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, CUfunction> positionContractionKernels;
std::map<int, CUfunction> forceContractionKernels;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment