Commit f15f591b authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert CudaArrays.

parent b8c86406
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. * * Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -265,9 +265,8 @@ private: ...@@ -265,9 +265,8 @@ private:
class CudaCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel { class CudaCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
public: public:
CudaCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicBondForceKernel(name, platform), CudaCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicBondForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL) { hasInitializedKernel(false), cu(cu), system(system) {
} }
~CudaCalcHarmonicBondForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -298,7 +297,7 @@ private: ...@@ -298,7 +297,7 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -307,7 +306,7 @@ private: ...@@ -307,7 +306,7 @@ private:
class CudaCalcCustomBondForceKernel : public CalcCustomBondForceKernel { class CudaCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public: public:
CudaCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomBondForceKernel(name, platform), CudaCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomBondForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
} }
~CudaCalcCustomBondForceKernel(); ~CudaCalcCustomBondForceKernel();
/** /**
...@@ -341,7 +340,7 @@ private: ...@@ -341,7 +340,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
}; };
...@@ -352,9 +351,8 @@ private: ...@@ -352,9 +351,8 @@ private:
class CudaCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel { class CudaCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public: public:
CudaCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicAngleForceKernel(name, platform), CudaCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicAngleForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL) { hasInitializedKernel(false), cu(cu), system(system) {
} }
~CudaCalcHarmonicAngleForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -385,7 +383,7 @@ private: ...@@ -385,7 +383,7 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -394,7 +392,7 @@ private: ...@@ -394,7 +392,7 @@ private:
class CudaCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel { class CudaCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public: public:
CudaCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomAngleForceKernel(name, platform), CudaCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomAngleForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
} }
~CudaCalcCustomAngleForceKernel(); ~CudaCalcCustomAngleForceKernel();
/** /**
...@@ -428,7 +426,7 @@ private: ...@@ -428,7 +426,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
}; };
...@@ -439,9 +437,8 @@ private: ...@@ -439,9 +437,8 @@ private:
class CudaCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel { class CudaCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public: public:
CudaCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcPeriodicTorsionForceKernel(name, platform), CudaCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcPeriodicTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL) { hasInitializedKernel(false), cu(cu), system(system) {
} }
~CudaCalcPeriodicTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -472,7 +469,7 @@ private: ...@@ -472,7 +469,7 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray params;
}; };
/** /**
...@@ -481,9 +478,8 @@ private: ...@@ -481,9 +478,8 @@ private:
class CudaCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel { class CudaCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public: public:
CudaCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcRBTorsionForceKernel(name, platform), CudaCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcRBTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params1(NULL), params2(NULL) { hasInitializedKernel(false), cu(cu), system(system) {
} }
~CudaCalcRBTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -514,8 +510,8 @@ private: ...@@ -514,8 +510,8 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaArray* params1; CudaArray params1;
CudaArray* params2; CudaArray params2;
}; };
/** /**
...@@ -524,9 +520,8 @@ private: ...@@ -524,9 +520,8 @@ private:
class CudaCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel { class CudaCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public: public:
CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCMAPTorsionForceKernel(name, platform), CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCMAPTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), coefficients(NULL), mapPositions(NULL), torsionMaps(NULL) { hasInitializedKernel(false), cu(cu), system(system) {
} }
~CudaCalcCMAPTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -558,9 +553,9 @@ private: ...@@ -558,9 +553,9 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
std::vector<int2> mapPositionsVec; std::vector<int2> mapPositionsVec;
CudaArray* coefficients; CudaArray coefficients;
CudaArray* mapPositions; CudaArray mapPositions;
CudaArray* torsionMaps; CudaArray torsionMaps;
}; };
/** /**
...@@ -569,7 +564,7 @@ private: ...@@ -569,7 +564,7 @@ private:
class CudaCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel { class CudaCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public: public:
CudaCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomTorsionForceKernel(name, platform), CudaCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
} }
~CudaCalcCustomTorsionForceKernel(); ~CudaCalcCustomTorsionForceKernel();
/** /**
...@@ -603,7 +598,7 @@ private: ...@@ -603,7 +598,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
}; };
...@@ -614,9 +609,7 @@ private: ...@@ -614,9 +609,7 @@ private:
class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel { class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform), CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL), cu(cu), hasInitializedFFT(false), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL), pmeDispersionBsplineModuliY(NULL),
pmeDispersionBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
} }
~CudaCalcNonbondedForceKernel(); ~CudaCalcNonbondedForceKernel();
/** /**
...@@ -682,20 +675,20 @@ private: ...@@ -682,20 +675,20 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
bool hasInitializedFFT; bool hasInitializedFFT;
CudaArray* sigmaEpsilon; CudaArray sigmaEpsilon;
CudaArray* exceptionParams; CudaArray exceptionParams;
CudaArray* cosSinSums; CudaArray cosSinSums;
CudaArray* directPmeGrid; CudaArray directPmeGrid;
CudaArray* reciprocalPmeGrid; CudaArray reciprocalPmeGrid;
CudaArray* pmeBsplineModuliX; CudaArray pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY; CudaArray pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ; CudaArray pmeBsplineModuliZ;
CudaArray* pmeDispersionBsplineModuliX; CudaArray pmeDispersionBsplineModuliX;
CudaArray* pmeDispersionBsplineModuliY; CudaArray pmeDispersionBsplineModuliY;
CudaArray* pmeDispersionBsplineModuliZ; CudaArray pmeDispersionBsplineModuliZ;
CudaArray* pmeAtomRange; CudaArray pmeAtomRange;
CudaArray* pmeAtomGridIndex; CudaArray pmeAtomGridIndex;
CudaArray* pmeEnergyBuffer; CudaArray pmeEnergyBuffer;
CudaSort* sort; CudaSort* sort;
Kernel cpuPme; Kernel cpuPme;
PmeIO* pmeio; PmeIO* pmeio;
...@@ -737,7 +730,7 @@ private: ...@@ -737,7 +730,7 @@ private:
class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel { class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public: public:
CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomNonbondedForceKernel(name, platform), CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomNonbondedForceKernel(name, platform),
cu(cu), params(NULL), globals(NULL), interactionGroupData(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) { cu(cu), params(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
} }
~CudaCalcCustomNonbondedForceKernel(); ~CudaCalcCustomNonbondedForceKernel();
/** /**
...@@ -769,13 +762,13 @@ private: ...@@ -769,13 +762,13 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray globals;
CudaArray* interactionGroupData; CudaArray interactionGroupData;
CUfunction interactionGroupKernel; CUfunction interactionGroupKernel;
std::vector<void*> interactionGroupArgs; std::vector<void*> interactionGroupArgs;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray> tabulatedFunctions;
double longRangeCoefficient; double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs; std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel, hasParamDerivs; bool hasInitializedLongRangeCorrection, hasInitializedKernel, hasParamDerivs;
...@@ -790,9 +783,8 @@ private: ...@@ -790,9 +783,8 @@ private:
class CudaCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel { class CudaCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
public: public:
CudaCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGBSAOBCForceKernel(name, platform), cu(cu), CudaCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGBSAOBCForceKernel(name, platform), cu(cu),
hasCreatedKernels(false), params(NULL), bornSum(NULL), bornRadii(NULL), bornForce(NULL), obcChain(NULL) { hasCreatedKernels(false) {
} }
~CudaCalcGBSAOBCForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -823,11 +815,11 @@ private: ...@@ -823,11 +815,11 @@ private:
int maxTiles; int maxTiles;
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
CudaArray* params; CudaArray params;
CudaArray* bornSum; CudaArray bornSum;
CudaArray* bornRadii; CudaArray bornRadii;
CudaArray* bornForce; CudaArray bornForce;
CudaArray* obcChain; CudaArray obcChain;
CUfunction computeBornSumKernel; CUfunction computeBornSumKernel;
CUfunction reduceBornSumKernel; CUfunction reduceBornSumKernel;
CUfunction force1Kernel; CUfunction force1Kernel;
...@@ -841,8 +833,7 @@ private: ...@@ -841,8 +833,7 @@ private:
class CudaCalcCustomGBForceKernel : public CalcCustomGBForceKernel { class CudaCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public: public:
CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomGBForceKernel(name, platform), CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), longEnergyDerivs(NULL), globals(NULL), hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), system(system) {
valueBuffers(NULL), system(system) {
} }
~CudaCalcCustomGBForceKernel(); ~CudaCalcCustomGBForceKernel();
/** /**
...@@ -880,13 +871,13 @@ private: ...@@ -880,13 +871,13 @@ private:
CudaParameterSet* energyDerivs; CudaParameterSet* energyDerivs;
CudaParameterSet* energyDerivChain; CudaParameterSet* energyDerivChain;
std::vector<CudaParameterSet*> dValuedParam; std::vector<CudaParameterSet*> dValuedParam;
std::vector<CudaArray*> dValue0dParam; std::vector<CudaArray> dValue0dParam;
CudaArray* longEnergyDerivs; CudaArray longEnergyDerivs;
CudaArray* globals; CudaArray globals;
CudaArray* valueBuffers; CudaArray valueBuffers;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray> tabulatedFunctions;
std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue; std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
const System& system; const System& system;
CUfunction pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel; CUfunction pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
...@@ -901,7 +892,7 @@ private: ...@@ -901,7 +892,7 @@ private:
class CudaCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel { class CudaCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
public: public:
CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomExternalForceKernel(name, platform), CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomExternalForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
} }
~CudaCalcCustomExternalForceKernel(); ~CudaCalcCustomExternalForceKernel();
/** /**
...@@ -935,7 +926,7 @@ private: ...@@ -935,7 +926,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
}; };
...@@ -946,8 +937,7 @@ private: ...@@ -946,8 +937,7 @@ private:
class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel { class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
public: public:
CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomHbondForceKernel(name, platform), CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomHbondForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL), hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), system(system) {
globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL), system(system) {
} }
~CudaCalcCustomHbondForceKernel(); ~CudaCalcCustomHbondForceKernel();
/** /**
...@@ -981,14 +971,14 @@ private: ...@@ -981,14 +971,14 @@ private:
ForceInfo* info; ForceInfo* info;
CudaParameterSet* donorParams; CudaParameterSet* donorParams;
CudaParameterSet* acceptorParams; CudaParameterSet* acceptorParams;
CudaArray* globals; CudaArray globals;
CudaArray* donors; CudaArray donors;
CudaArray* acceptors; CudaArray acceptors;
CudaArray* donorExclusions; CudaArray donorExclusions;
CudaArray* acceptorExclusions; CudaArray acceptorExclusions;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray> tabulatedFunctions;
std::vector<void*> donorArgs, acceptorArgs; std::vector<void*> donorArgs, acceptorArgs;
const System& system; const System& system;
CUfunction donorKernel, acceptorKernel; CUfunction donorKernel, acceptorKernel;
...@@ -1000,7 +990,7 @@ private: ...@@ -1000,7 +990,7 @@ private:
class CudaCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel { class CudaCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel {
public: public:
CudaCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCentroidBondForceKernel(name, platform), CudaCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCentroidBondForceKernel(name, platform),
cu(cu), params(NULL), globals(NULL), groupParticles(NULL), groupWeights(NULL), groupOffsets(NULL), groupForces(NULL), bondGroups(NULL), centerPositions(NULL), system(system) { cu(cu), params(NULL), system(system) {
} }
~CudaCalcCustomCentroidBondForceKernel(); ~CudaCalcCustomCentroidBondForceKernel();
/** /**
...@@ -1034,16 +1024,16 @@ private: ...@@ -1034,16 +1024,16 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray globals;
CudaArray* groupParticles; CudaArray groupParticles;
CudaArray* groupWeights; CudaArray groupWeights;
CudaArray* groupOffsets; CudaArray groupOffsets;
CudaArray* groupForces; CudaArray groupForces;
CudaArray* bondGroups; CudaArray bondGroups;
CudaArray* centerPositions; CudaArray centerPositions;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray> tabulatedFunctions;
std::vector<void*> groupForcesArgs; std::vector<void*> groupForcesArgs;
CUfunction computeCentersKernel, groupForcesKernel, applyForcesKernel; CUfunction computeCentersKernel, groupForcesKernel, applyForcesKernel;
const System& system; const System& system;
...@@ -1055,7 +1045,7 @@ private: ...@@ -1055,7 +1045,7 @@ private:
class CudaCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel { class CudaCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
public: public:
CudaCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCompoundBondForceKernel(name, platform), CudaCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCompoundBondForceKernel(name, platform),
cu(cu), params(NULL), globals(NULL), system(system) { cu(cu), params(NULL), system(system) {
} }
~CudaCalcCustomCompoundBondForceKernel(); ~CudaCalcCustomCompoundBondForceKernel();
/** /**
...@@ -1088,10 +1078,10 @@ private: ...@@ -1088,10 +1078,10 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray> tabulatedFunctions;
const System& system; const System& system;
}; };
...@@ -1101,9 +1091,7 @@ private: ...@@ -1101,9 +1091,7 @@ private:
class CudaCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel { class CudaCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel {
public: public:
CudaCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomManyParticleForceKernel(name, platform), CudaCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomManyParticleForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), params(NULL), particleTypes(NULL), orderIndex(NULL), particleOrder(NULL), exclusions(NULL), hasInitializedKernel(false), cu(cu), params(NULL), system(system) {
exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighborPairs(NULL), numNeighborPairs(NULL), neighborStartIndex(NULL),
numNeighborsForAtom(NULL), neighbors(NULL), system(system) {
} }
~CudaCalcCustomManyParticleForceKernel(); ~CudaCalcCustomManyParticleForceKernel();
/** /**
...@@ -1138,21 +1126,21 @@ private: ...@@ -1138,21 +1126,21 @@ private:
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize; int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* particleTypes; CudaArray particleTypes;
CudaArray* orderIndex; CudaArray orderIndex;
CudaArray* particleOrder; CudaArray particleOrder;
CudaArray* exclusions; CudaArray exclusions;
CudaArray* exclusionStartIndex; CudaArray exclusionStartIndex;
CudaArray* blockCenter; CudaArray blockCenter;
CudaArray* blockBoundingBox; CudaArray blockBoundingBox;
CudaArray* neighborPairs; CudaArray neighborPairs;
CudaArray* numNeighborPairs; CudaArray numNeighborPairs;
CudaArray* neighborStartIndex; CudaArray neighborStartIndex;
CudaArray* numNeighborsForAtom; CudaArray numNeighborsForAtom;
CudaArray* neighbors; CudaArray neighbors;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray> tabulatedFunctions;
std::vector<void*> forceArgs, blockBoundsArgs, neighborsArgs, startIndicesArgs, copyPairsArgs; std::vector<void*> forceArgs, blockBoundsArgs, neighborsArgs, startIndicesArgs, copyPairsArgs;
const System& system; const System& system;
CUfunction forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel; CUfunction forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel;
......
...@@ -84,8 +84,10 @@ void CudaBondedUtilities::initialize(const System& system) { ...@@ -84,8 +84,10 @@ void CudaBondedUtilities::initialize(const System& system) {
for (int i = 0; i < numForces; i++) { for (int i = 0; i < numForces; i++) {
int numBonds = forceAtoms[i].size(); int numBonds = forceAtoms[i].size();
int numAtoms = forceAtoms[i][0].size(); int numAtoms = forceAtoms[i][0].size();
int numArrays = (numAtoms+3)/4;
int startAtom = 0; int startAtom = 0;
while (startAtom < numAtoms) { atomIndices[i].resize(numArrays);
for (int j = 0; j < numArrays; j++) {
int width = min(numAtoms-startAtom, 4); int width = min(numAtoms-startAtom, 4);
int paddedWidth = (width == 3 ? 4 : width); int paddedWidth = (width == 3 ? 4 : width);
vector<unsigned int> indexVec(paddedWidth*numBonds); vector<unsigned int> indexVec(paddedWidth*numBonds);
...@@ -93,9 +95,8 @@ void CudaBondedUtilities::initialize(const System& system) { ...@@ -93,9 +95,8 @@ void CudaBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < width; atom++) for (int atom = 0; atom < width; atom++)
indexVec[bond*paddedWidth+atom] = forceAtoms[i][bond][startAtom+atom]; indexVec[bond*paddedWidth+atom] = forceAtoms[i][bond][startAtom+atom];
} }
atomIndices[i].push_back(CudaArray()); atomIndices[i][j].initialize(context, numBonds, 4*paddedWidth, "bondedIndices");
atomIndices[i].back().initialize(context, numBonds, 4*paddedWidth, "bondedIndices"); atomIndices[i][j].upload(&indexVec[0]);
atomIndices[i].back().upload(&indexVec[0]);
startAtom += width; startAtom += width;
} }
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. * * Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -505,12 +505,6 @@ private: ...@@ -505,12 +505,6 @@ private:
const HarmonicBondForce& force; const HarmonicBondForce& force;
}; };
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) { void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
cu.setAsCurrent(); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
...@@ -520,18 +514,18 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har ...@@ -520,18 +514,18 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
if (numBonds == 0) if (numBonds == 0)
return; return;
vector<vector<int> > atoms(numBonds, vector<int>(2)); vector<vector<int> > atoms(numBonds, vector<int>(2));
params = CudaArray::create<float2>(cu, numBonds, "bondParams"); params.initialize<float2>(cu, numBonds, "bondParams");
vector<float2> paramVector(numBonds); vector<float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) { for (int i = 0; i < numBonds; i++) {
double length, k; double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k); force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = make_float2((float) length, (float) k); paramVector[i] = make_float2((float) length, (float) k);
} }
params->upload(paramVector); params.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce; replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
...@@ -560,7 +554,7 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte ...@@ -560,7 +554,7 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte
force.getBondParameters(startIndex+i, atom1, atom2, length, k); force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = make_float2((float) length, (float) k); paramVector[i] = make_float2((float) length, (float) k);
} }
params->upload(paramVector); params.upload(paramVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -600,8 +594,6 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() { ...@@ -600,8 +594,6 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) { void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
...@@ -649,9 +641,9 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo ...@@ -649,9 +641,9 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customBondGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]"; string value = argName+"["+cu.intToString(i)+"]";
...@@ -680,7 +672,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo ...@@ -680,7 +672,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
} }
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -689,7 +681,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include ...@@ -689,7 +681,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -749,12 +741,6 @@ private: ...@@ -749,12 +741,6 @@ private:
const HarmonicAngleForce& force; const HarmonicAngleForce& force;
}; };
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) { void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
cu.setAsCurrent(); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
...@@ -764,7 +750,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha ...@@ -764,7 +750,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
if (numAngles == 0) if (numAngles == 0)
return; return;
vector<vector<int> > atoms(numAngles, vector<int>(3)); vector<vector<int> > atoms(numAngles, vector<int>(3));
params = CudaArray::create<float2>(cu, numAngles, "angleParams"); params.initialize<float2>(cu, numAngles, "angleParams");
vector<float2> paramVector(numAngles); vector<float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) { for (int i = 0; i < numAngles; i++) {
double angle, k; double angle, k;
...@@ -772,11 +758,11 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha ...@@ -772,11 +758,11 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
paramVector[i] = make_float2((float) angle, (float) k); paramVector[i] = make_float2((float) angle, (float) k);
} }
params->upload(paramVector); params.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce; replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
...@@ -805,7 +791,7 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont ...@@ -805,7 +791,7 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k); force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = make_float2((float) angle, (float) k); paramVector[i] = make_float2((float) angle, (float) k);
} }
params->upload(paramVector); params.upload(paramVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -846,8 +832,6 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() { ...@@ -846,8 +832,6 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) { void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
...@@ -895,9 +879,9 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust ...@@ -895,9 +879,9 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
variables[name] = "angleParams"+params->getParameterSuffix(i); variables[name] = "angleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]"; string value = argName+"["+cu.intToString(i)+"]";
...@@ -926,7 +910,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust ...@@ -926,7 +910,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
} }
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -935,7 +919,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ ...@@ -935,7 +919,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -996,12 +980,6 @@ private: ...@@ -996,12 +980,6 @@ private:
const PeriodicTorsionForce& force; const PeriodicTorsionForce& force;
}; };
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) { void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
cu.setAsCurrent(); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
...@@ -1011,7 +989,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const ...@@ -1011,7 +989,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params = CudaArray::create<float4>(cu, numTorsions, "periodicTorsionParams"); params.initialize<float4>(cu, numTorsions, "periodicTorsionParams");
vector<float4> paramVector(numTorsions); vector<float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
int periodicity; int periodicity;
...@@ -1019,11 +997,11 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const ...@@ -1019,11 +997,11 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k); force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f); paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
} }
params->upload(paramVector); params.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce; replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float4"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
...@@ -1052,7 +1030,7 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co ...@@ -1052,7 +1030,7 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k); force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f); paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
} }
params->upload(paramVector); params.upload(paramVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -1087,14 +1065,6 @@ private: ...@@ -1087,14 +1065,6 @@ private:
const RBTorsionForce& force; const RBTorsionForce& force;
}; };
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
cu.setAsCurrent();
if (params1 != NULL)
delete params1;
if (params2 != NULL)
delete params2;
}
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) { void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
cu.setAsCurrent(); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
...@@ -1104,8 +1074,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors ...@@ -1104,8 +1074,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params1 = CudaArray::create<float4>(cu, numTorsions, "rbTorsionParams1"); params1.initialize<float4>(cu, numTorsions, "rbTorsionParams1");
params2 = CudaArray::create<float2>(cu, numTorsions, "rbTorsionParams2"); params2.initialize<float2>(cu, numTorsions, "rbTorsionParams2");
vector<float4> paramVector1(numTorsions); vector<float4> paramVector1(numTorsions);
vector<float2> paramVector2(numTorsions); vector<float2> paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
...@@ -1115,13 +1085,13 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors ...@@ -1115,13 +1085,13 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
paramVector2[i] = make_float2((float) c4, (float) c5); paramVector2[i] = make_float2((float) c4, (float) c5);
} }
params1->upload(paramVector1); params1.upload(paramVector1);
params2->upload(paramVector2); params2.upload(paramVector2);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce; replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1->getDevicePointer(), "float4"); replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1.getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2->getDevicePointer(), "float2"); replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
...@@ -1152,8 +1122,8 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1152,8 +1122,8 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3); paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5); paramVector2[i] = make_float2((float) c4, (float) c5);
} }
params1->upload(paramVector1); params1.upload(paramVector1);
params2->upload(paramVector2); params2.upload(paramVector2);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -1190,15 +1160,6 @@ private: ...@@ -1190,15 +1160,6 @@ private:
const CMAPTorsionForce& force; const CMAPTorsionForce& force;
}; };
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) { void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
cu.setAsCurrent(); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
...@@ -1230,17 +1191,17 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP ...@@ -1230,17 +1191,17 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
vector<int> torsionMapsVec(numTorsions); vector<int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]); force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = CudaArray::create<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients"); coefficients.initialize<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = CudaArray::create<int2>(cu, numMaps, "cmapTorsionMapPositions"); mapPositions.initialize<int2>(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps = CudaArray::create<int>(cu, numTorsions, "cmapTorsionMaps"); torsionMaps.initialize<int>(cu, numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec); coefficients.upload(coeffVec);
mapPositions->upload(mapPositionsVec); mapPositions.upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec); torsionMaps.upload(torsionMapsVec);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients->getDevicePointer(), "float4"); replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients.getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions->getDevicePointer(), "int2"); replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions.getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps->getDevicePointer(), "int"); replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps.getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
...@@ -1256,9 +1217,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -1256,9 +1217,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex; numTorsions = endIndex-startIndex;
if (mapPositions->getSize() != numMaps) if (mapPositions.getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed"); throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps->getSize() != numTorsions) if (torsionMaps.getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed"); throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps. // Update the maps.
...@@ -1281,7 +1242,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -1281,7 +1242,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15])); coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
} }
} }
coefficients->upload(coeffVec); coefficients.upload(coeffVec);
// Update the indices. // Update the indices.
...@@ -1290,7 +1251,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -1290,7 +1251,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int index[8]; int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]); force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
} }
torsionMaps->upload(torsionMapsVec); torsionMaps.upload(torsionMapsVec);
} }
class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo { class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
...@@ -1327,8 +1288,6 @@ private: ...@@ -1327,8 +1288,6 @@ private:
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() { CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) { void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
...@@ -1376,9 +1335,9 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu ...@@ -1376,9 +1335,9 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
variables[name] = "torsionParams"+params->getParameterSuffix(i); variables[name] = "torsionParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]"; string value = argName+"["+cu.intToString(i)+"]";
...@@ -1407,7 +1366,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu ...@@ -1407,7 +1366,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
} }
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -1416,7 +1375,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl ...@@ -1416,7 +1375,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -1576,34 +1535,6 @@ private: ...@@ -1576,34 +1535,6 @@ private:
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() { CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu.setAsCurrent(); cu.setAsCurrent();
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (directPmeGrid != NULL)
delete directPmeGrid;
if (reciprocalPmeGrid != NULL)
delete reciprocalPmeGrid;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -1647,7 +1578,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1647,7 +1578,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions. // Initialize nonbonded interactions.
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq(); CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize()); vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0]; float4* posqf = (float4*) &temp[0];
...@@ -1682,7 +1613,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1682,7 +1613,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exclusionList[exclusion.second].push_back(exclusion.first); exclusionList[exclusion.second].push_back(exclusion.first);
} }
posq.upload(&temp[0]); posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector); sigmaEpsilon.upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod()); nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff); bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic); bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
...@@ -1740,7 +1671,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1740,7 +1671,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums"); ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces"); ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums"); cosSinSums.initialize(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
} }
} }
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) { else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
...@@ -1844,22 +1775,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1844,22 +1775,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int gridElements = gridSizeX*gridSizeY*gridSizeZ; int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME) if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ); gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
directPmeGrid = new CudaArray(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid"); directPmeGrid.initialize(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridElements, 2*elementSize, "reciprocalPmeGrid"); reciprocalPmeGrid.initialize(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(*directPmeGrid); cu.addAutoclearBuffer(directPmeGrid);
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX"); pmeBsplineModuliX.initialize(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY"); pmeBsplineModuliY.initialize(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ"); pmeBsplineModuliZ.initialize(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) { if (doLJPME) {
pmeDispersionBsplineModuliX = new CudaArray(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX"); pmeDispersionBsplineModuliX.initialize(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY = new CudaArray(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY"); pmeDispersionBsplineModuliY.initialize(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ = new CudaArray(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ"); pmeDispersionBsplineModuliZ.initialize(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
} }
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange"); pmeAtomRange.initialize<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex"); pmeAtomGridIndex.initialize<int2>(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer"); pmeEnergyBuffer.initialize(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(*pmeEnergyBuffer); cu.clearBuffer(pmeEnergyBuffer);
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms()); sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion; int cufftVersion;
cufftGetVersion(&cufftVersion); cufftGetVersion(&cufftVersion);
...@@ -1903,7 +1834,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1903,7 +1834,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (recipForceGroup < 0) if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup(); recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup)); cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup)); cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), pmeEnergyBuffer, recipForceGroup));
} }
hasInitializedFFT = true; hasInitializedFFT = true;
...@@ -1916,9 +1847,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1916,9 +1847,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = gridSizeX; xsize = gridSizeX;
ysize = gridSizeY; ysize = gridSizeY;
zsize = gridSizeZ; zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX; xmoduli = &pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY; ymoduli = &pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ; zmoduli = &pmeBsplineModuliZ;
} }
else { else {
if (!doLJPME) if (!doLJPME)
...@@ -1926,9 +1857,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1926,9 +1857,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = dispersionGridSizeX; xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY; ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ; zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX; xmoduli = &pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY; ymoduli = &pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ; zmoduli = &pmeDispersionBsplineModuliZ;
} }
int maxSize = max(max(xsize, ysize), zsize); int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder); vector<double> data(PmeOrder);
...@@ -2008,7 +1939,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -2008,7 +1939,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true); cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ) if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer())); sizeof(float2), sigmaEpsilon.getDevicePointer()));
// Initialize the exceptions. // Initialize the exceptions.
...@@ -2019,7 +1950,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -2019,7 +1950,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (numExceptions > 0) { if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions); exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2)); vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = CudaArray::create<float4>(cu, numExceptions, "exceptionParams"); exceptionParams.initialize<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions); vector<float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
...@@ -2027,9 +1958,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -2027,9 +1958,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f); exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]); exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
} }
exceptionParams->upload(exceptionParamsVector); exceptionParams.upload(exceptionParamsVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams->getDevicePointer(), "float4"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
} }
info = new ForceInfo(force); info = new ForceInfo(force);
...@@ -2037,13 +1968,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -2037,13 +1968,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
} }
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
if (cosSinSums != NULL && includeReciprocal) { if (cosSinSums.isInitialized() && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()}; void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums->getSize()); cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()}; void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms()); cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
} }
if (directPmeGrid != NULL && includeReciprocal) { if (directPmeGrid.isInitialized()&& includeReciprocal) {
if (usePmeStream) if (usePmeStream)
cu.setCurrentStream(pmeStream); cu.setCurrentStream(pmeStream);
...@@ -2075,118 +2006,118 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2075,118 +2006,118 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels. // Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms()); cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtomGridIndex); sort->sort(pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) { if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()}; void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256); cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
} }
if (useCudaFFT) { if (useCudaFFT) {
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
cufftExecD2Z(fftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer()); cufftExecD2Z(fftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
else else
cufftExecR2C(fftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer()); cufftExecR2C(fftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
} }
else { else {
fft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true); fft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
} }
if (includeEnergy) { if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(), void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(), &pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ); cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
} }
void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(), &pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256); cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
if (useCudaFFT) { if (useCudaFFT) {
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer()); cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
else else
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*) directPmeGrid->getDevicePointer()); cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*) directPmeGrid.getDevicePointer());
} }
else { else {
fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false); fft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
} }
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for // As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active. // now we assume that LJPME can only be used if electrostatic PME is also active.
if (doLJPME) { if (doLJPME) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms()); cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtomGridIndex); sort->sort(pmeAtomGridIndex);
cu.clearBuffer(*directPmeGrid); cu.clearBuffer(directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon->getDevicePointer()}; &sigmaEpsilon.getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) { if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()}; void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256); cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
} }
if (useCudaFFT) { if (useCudaFFT) {
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer()); cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
else else
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer()); cufftExecR2C(dispersionFftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
} }
else { else {
dispersionFft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true); dispersionFft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
} }
if (includeEnergy) { if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(), void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(), &pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ); cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
} }
void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(), &pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256); cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) { if (useCudaFFT) {
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer()); cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
else else
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*) directPmeGrid->getDevicePointer()); cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*) directPmeGrid.getDevicePointer());
} }
else { else {
dispersionFft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false); dispersionFft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
} }
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(), cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon->getDevicePointer()}; &sigmaEpsilon.getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
} }
if (usePmeStream) { if (usePmeStream) {
...@@ -2253,7 +2184,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -2253,7 +2184,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
cu.setCharges(chargeVector); cu.setCharges(chargeVector);
sigmaEpsilon->upload(sigmaEpsilonVector); sigmaEpsilon.upload(sigmaEpsilonVector);
// Record the exceptions. // Record the exceptions.
...@@ -2265,7 +2196,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -2265,7 +2196,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon); force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f); exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
} }
exceptionParams->upload(exceptionParamsVector); exceptionParams.upload(exceptionParamsVector);
} }
// Compute other values. // Compute other values.
...@@ -2355,12 +2286,6 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() { ...@@ -2355,12 +2286,6 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (auto function : tabulatedFunctions)
delete function;
if (forceCopy != NULL) if (forceCopy != NULL)
delete forceCopy; delete forceCopy;
} }
...@@ -2377,7 +2302,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2377,7 +2302,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters"); params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
vector<vector<float> > paramVector(numParticles); vector<vector<float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -2402,6 +2327,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2402,6 +2327,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
vector<string> tableTypes; vector<string> tableTypes;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -2410,9 +2336,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2410,9 +2336,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer())); cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
if (width == 1) if (width == 1)
tableTypes.push_back("float"); tableTypes.push_back("float");
else else
...@@ -2427,8 +2353,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2427,8 +2353,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
globalParamNames[i] = force.getGlobalParameterName(i); globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i); globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
} }
if (globals != NULL) if (globals.isInitialized())
globals->upload(globalParamValues); globals.upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic); bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize(); Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
...@@ -2482,9 +2408,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2482,9 +2408,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
} }
if (globals != NULL) { if (globals.isInitialized()) {
globals->upload(globalParamValues); globals.upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer())); cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals.getDevicePointer()));
} }
} }
info = new ForceInfo(force); info = new ForceInfo(force);
...@@ -2660,8 +2586,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo ...@@ -2660,8 +2586,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
for (; indexInTileSet < 32; indexInTileSet++) for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(make_int4(0, 0, 0, 0)); groupData.push_back(make_int4(0, 0, 0, 0));
} }
interactionGroupData = CudaArray::create<int4>(cu, groupData.size(), "interactionGroupData"); interactionGroupData.initialize<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData->upload(groupData); interactionGroupData.upload(groupData);
// Create the kernel. // Create the kernel.
...@@ -2687,7 +2613,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo ...@@ -2687,7 +2613,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1); args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1);
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i; args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
if (globals != NULL) if (globals.isInitialized())
args<<", const float* __restrict__ globals"; args<<", const float* __restrict__ globals";
if (hasParamDerivs) if (hasParamDerivs)
args << ", mixed* __restrict__ energyParamDerivs"; args << ", mixed* __restrict__ energyParamDerivs";
...@@ -2758,7 +2684,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo ...@@ -2758,7 +2684,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
} }
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -2767,7 +2693,7 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in ...@@ -2767,7 +2693,7 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) { if (changed) {
globals->upload(globalParamValues); globals.upload(globalParamValues);
if (forceCopy != NULL) { if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs); CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
...@@ -2778,13 +2704,13 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in ...@@ -2778,13 +2704,13 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs); CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
} }
if (interactionGroupData != NULL) { if (interactionGroupData.isInitialized()) {
if (!hasInitializedKernel) { if (!hasInitializedKernel) {
hasInitializedKernel = true; hasInitializedKernel = true;
interactionGroupArgs.push_back(&cu.getForce().getDevicePointer()); interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer()); interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer()); interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData->getDevicePointer()); interactionGroupArgs.push_back(&interactionGroupData.getDevicePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer()); interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer()); interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer()); interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
...@@ -2792,10 +2718,10 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in ...@@ -2792,10 +2718,10 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer()); interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
interactionGroupArgs.push_back(&buffer.getMemory()); interactionGroupArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
interactionGroupArgs.push_back(&function->getDevicePointer()); interactionGroupArgs.push_back(&function.getDevicePointer());
if (globals != NULL) if (globals.isInitialized())
interactionGroupArgs.push_back(&globals->getDevicePointer()); interactionGroupArgs.push_back(&globals.getDevicePointer());
if (hasParamDerivs) if (hasParamDerivs)
interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer()); interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
} }
...@@ -2855,38 +2781,24 @@ private: ...@@ -2855,38 +2781,24 @@ private:
const GBSAOBCForce& force; const GBSAOBCForce& force;
}; };
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (obcChain != NULL)
delete obcChain;
}
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) { void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
cu.setAsCurrent(); cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1) if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices"); throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities(); CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
params = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams"); params.initialize<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
bornRadii = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "bornRadii"); bornRadii.initialize<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "obcChain"); obcChain.initialize<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
} }
else { else {
bornRadii = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "bornRadii"); bornRadii.initialize<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "obcChain"); obcChain.initialize<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
} }
bornSum = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "bornSum"); bornSum.initialize<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "bornForce"); bornForce.initialize<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(*bornSum); cu.addAutoclearBuffer(bornSum);
cu.addAutoclearBuffer(*bornForce); cu.addAutoclearBuffer(bornForce);
CudaArray& posq = cu.getPosq(); CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize()); vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0]; float4* posqf = (float4*) &temp[0];
...@@ -2904,7 +2816,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF ...@@ -2904,7 +2816,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
posqf[i] = make_float4(0, 0, 0, (float) charge); posqf[i] = make_float4(0, 0, 0, (float) charge);
} }
posq.upload(&temp[0]); posq.upload(&temp[0]);
params->upload(paramsVector); params.upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric())); prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy(); surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
...@@ -2912,8 +2824,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF ...@@ -2912,8 +2824,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cutoff = force.getCutoffDistance(); cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2; string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup()); nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer())); nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params.getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer())); nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce.getDevicePointer()));
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
} }
...@@ -2951,9 +2863,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -2951,9 +2863,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
map<string, string> replacements; map<string, string> replacements;
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum"); computeBornSumKernel = cu.getKernel(module, "computeBornSum");
computeSumArgs.push_back(&bornSum->getDevicePointer()); computeSumArgs.push_back(&bornSum.getDevicePointer());
computeSumArgs.push_back(&cu.getPosq().getDevicePointer()); computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
computeSumArgs.push_back(&params->getDevicePointer()); computeSumArgs.push_back(&params.getDevicePointer());
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer()); computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer()); computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
...@@ -2972,10 +2884,10 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -2972,10 +2884,10 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer()); computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
force1Kernel = cu.getKernel(module, "computeGBSAForce1"); force1Kernel = cu.getKernel(module, "computeGBSAForce1");
force1Args.push_back(&cu.getForce().getDevicePointer()); force1Args.push_back(&cu.getForce().getDevicePointer());
force1Args.push_back(&bornForce->getDevicePointer()); force1Args.push_back(&bornForce.getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer()); force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer()); force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&bornRadii->getDevicePointer()); force1Args.push_back(&bornRadii.getDevicePointer());
force1Args.push_back(NULL); force1Args.push_back(NULL);
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer()); force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
...@@ -3008,12 +2920,12 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -3008,12 +2920,12 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
} }
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
float alpha = 1.0f, beta = 0.8f, gamma = 4.85f; float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum->getDevicePointer(), &params->getDevicePointer(), void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum.getDevicePointer(), &params.getDevicePointer(),
&bornRadii->getDevicePointer(), &obcChain->getDevicePointer()}; &bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms()); cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
void* reduceForceArgs[] = {&bornForce->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &params->getDevicePointer(), void* reduceForceArgs[] = {&bornForce.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &params.getDevicePointer(),
&bornRadii->getDevicePointer(), &obcChain->getDevicePointer()}; &bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms()); cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
return 0.0; return 0.0;
} }
...@@ -3039,7 +2951,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -3039,7 +2951,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius)); paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
} }
cu.setCharges(chargeVector); cu.setCharges(chargeVector);
params->upload(paramsVector); params.upload(paramsVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -3087,16 +2999,6 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() { ...@@ -3087,16 +2999,6 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
delete energyDerivs; delete energyDerivs;
if (energyDerivChain != NULL) if (energyDerivChain != NULL)
delete energyDerivChain; delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
for (auto function : tabulatedFunctions)
delete function;
for (auto d : dValue0dParam)
delete d;
for (auto d : dValuedParam) for (auto d : dValuedParam)
delete d; delete d;
} }
...@@ -3135,7 +3037,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3135,7 +3037,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true); params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision()); computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customGBGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
vector<vector<float> > paramVector(paddedNumParticles, vector<float>(numParams, 0)); vector<vector<float> > paramVector(paddedNumParticles, vector<float>(numParams, 0));
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -3159,6 +3061,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3159,6 +3061,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream tableArgs; stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -3167,9 +3070,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3167,9 +3070,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer())); cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
tableArgs << ", const float"; tableArgs << ", const float";
if (width > 1) if (width > 1)
tableArgs << width; tableArgs << width;
...@@ -3184,8 +3087,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3184,8 +3087,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
globalParamNames[i] = force.getGlobalParameterName(i); globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i); globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
} }
if (globals != NULL) if (globals.isInitialized())
globals->upload(globalParamValues); globals.upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms. // Record derivatives of expressions needed for the chain rule terms.
...@@ -3233,15 +3136,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3233,15 +3136,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++) for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize()); energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
} }
longEnergyDerivs = CudaArray::create<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives"); longEnergyDerivs.initialize<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true); energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true); energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0); needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) { for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true, cu.getUseDoublePrecision())); dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true, cu.getUseDoublePrecision()));
dValue0dParam.push_back(CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam")); dValue0dParam[i].initialize<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam");
cu.addAutoclearBuffer(*dValue0dParam.back()); cu.addAutoclearBuffer(dValue0dParam[i]);
string name = force.getEnergyParameterDerivativeName(i); string name = force.getEnergyParameterDerivativeName(i);
cu.addEnergyParameterDerivative(name); cu.addEnergyParameterDerivative(name);
} }
...@@ -3826,9 +3730,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3826,9 +3730,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
} }
} }
if (globals != NULL) { if (globals.isInitialized()) {
globals->upload(globalParamValues); globals.upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer())); arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals.getDevicePointer()));
} }
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup()); cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (auto param : parameters) for (auto param : parameters)
...@@ -3838,7 +3742,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG ...@@ -3838,7 +3742,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
} }
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
cu.addAutoclearBuffer(*longEnergyDerivs); cu.addAutoclearBuffer(longEnergyDerivs);
} }
double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
...@@ -3881,13 +3785,13 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3881,13 +3785,13 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
// Set arguments for kernels. // Set arguments for kernels.
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2); maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
valueBuffers = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers"); valueBuffers.initialize<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
cu.addAutoclearBuffer(*valueBuffers); cu.addAutoclearBuffer(valueBuffers);
cu.clearBuffer(valueBuffers->getDevicePointer(), sizeof(long long)*valueBuffers->getSize()); cu.clearBuffer(valueBuffers.getDevicePointer(), sizeof(long long)*valueBuffers.getSize());
pairValueArgs.push_back(&cu.getPosq().getDevicePointer()); pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer()); pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer()); pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
pairValueArgs.push_back(&valueBuffers->getDevicePointer()); pairValueArgs.push_back(&valueBuffers.getDevicePointer());
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer()); pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer()); pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
...@@ -3903,31 +3807,31 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3903,31 +3807,31 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
} }
else else
pairValueArgs.push_back(&maxTiles); pairValueArgs.push_back(&maxTiles);
if (globals != NULL) if (globals.isInitialized())
pairValueArgs.push_back(&globals->getDevicePointer()); pairValueArgs.push_back(&globals.getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i]) if (pairValueUsesParam[i])
pairValueArgs.push_back(&params->getBuffers()[i].getMemory()); pairValueArgs.push_back(&params->getBuffers()[i].getMemory());
} }
for (auto d : dValue0dParam) for (auto& d : dValue0dParam)
pairValueArgs.push_back(&d->getDevicePointer()); pairValueArgs.push_back(&d.getDevicePointer());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
pairValueArgs.push_back(&function->getDevicePointer()); pairValueArgs.push_back(&function.getDevicePointer());
perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer()); perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers->getDevicePointer()); perParticleValueArgs.push_back(&valueBuffers.getDevicePointer());
if (globals != NULL) if (globals.isInitialized())
perParticleValueArgs.push_back(&globals->getDevicePointer()); perParticleValueArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory()); perParticleValueArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers()) for (auto& buffer : computedValues->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory()); perParticleValueArgs.push_back(&buffer.getMemory());
for (int i = 0; i < dValuedParam.size(); i++) { for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueArgs.push_back(&dValue0dParam[i]->getDevicePointer()); perParticleValueArgs.push_back(&dValue0dParam[i].getDevicePointer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++) for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory()); perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
} }
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
perParticleValueArgs.push_back(&function->getDevicePointer()); perParticleValueArgs.push_back(&function.getDevicePointer());
pairEnergyArgs.push_back(&cu.getForce().getDevicePointer()); pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer()); pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer()); pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
...@@ -3949,8 +3853,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3949,8 +3853,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
} }
else else
pairEnergyArgs.push_back(&maxTiles); pairEnergyArgs.push_back(&maxTiles);
if (globals != NULL) if (globals.isInitialized())
pairEnergyArgs.push_back(&globals->getDevicePointer()); pairEnergyArgs.push_back(&globals.getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i]) if (pairEnergyUsesParam[i])
pairEnergyArgs.push_back(&params->getBuffers()[i].getMemory()); pairEnergyArgs.push_back(&params->getBuffers()[i].getMemory());
...@@ -3959,16 +3863,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3959,16 +3863,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
if (pairEnergyUsesValue[i]) if (pairEnergyUsesValue[i])
pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory()); pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
} }
pairEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer()); pairEnergyArgs.push_back(&longEnergyDerivs.getDevicePointer());
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer()); pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
pairEnergyArgs.push_back(&function->getDevicePointer()); pairEnergyArgs.push_back(&function.getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer()); perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer()); perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer()); perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals != NULL) if (globals.isInitialized())
perParticleEnergyArgs.push_back(&globals->getDevicePointer()); perParticleEnergyArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory()); perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers()) for (auto& buffer : computedValues->getBuffers())
...@@ -3977,16 +3881,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -3977,16 +3881,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
perParticleEnergyArgs.push_back(&buffer.getMemory()); perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : energyDerivChain->getBuffers()) for (auto& buffer : energyDerivChain->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory()); perParticleEnergyArgs.push_back(&buffer.getMemory());
perParticleEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer()); perParticleEnergyArgs.push_back(&longEnergyDerivs.getDevicePointer());
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer()); perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
perParticleEnergyArgs.push_back(&function->getDevicePointer()); perParticleEnergyArgs.push_back(&function.getDevicePointer());
if (needParameterGradient || needEnergyParamDerivs) { if (needParameterGradient || needEnergyParamDerivs) {
gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer()); gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer()); gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals != NULL) if (globals.isInitialized())
gradientChainRuleArgs.push_back(&globals->getDevicePointer()); gradientChainRuleArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
gradientChainRuleArgs.push_back(&buffer.getMemory()); gradientChainRuleArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers()) for (auto& buffer : computedValues->getBuffers())
...@@ -4001,7 +3905,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -4001,7 +3905,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
} }
} }
} }
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -4010,7 +3914,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -4010,7 +3914,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
pairEnergyArgs[5] = &includeEnergy; pairEnergyArgs[5] = &includeEnergy;
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
...@@ -4089,8 +3993,6 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() { ...@@ -4089,8 +3993,6 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) { void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
...@@ -4146,9 +4048,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C ...@@ -4146,9 +4048,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
variables[name] = "particleParams"+params->getParameterSuffix(i); variables[name] = "particleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]"; string value = argName+"["+cu.intToString(i)+"]";
...@@ -4170,7 +4072,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C ...@@ -4170,7 +4072,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
} }
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -4179,7 +4081,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc ...@@ -4179,7 +4081,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -4294,18 +4196,6 @@ CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() { ...@@ -4294,18 +4196,6 @@ CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
delete donorParams; delete donorParams;
if (acceptorParams != NULL) if (acceptorParams != NULL)
delete acceptorParams; delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
for (auto function : tabulatedFunctions)
delete function;
} }
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) { static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
...@@ -4333,12 +4223,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4333,12 +4223,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
if (numDonors == 0 || numAcceptors == 0) if (numDonors == 0 || numAcceptors == 0)
return; return;
int numParticles = system.getNumParticles(); int numParticles = system.getNumParticles();
donors = CudaArray::create<int4>(cu, numDonors, "customHbondDonors"); donors.initialize<int4>(cu, numDonors, "customHbondDonors");
acceptors = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptors"); acceptors.initialize<int4>(cu, numAcceptors, "customHbondAcceptors");
donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters"); donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters"); acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
vector<vector<float> > donorParamVector(numDonors); vector<vector<float> > donorParamVector(numDonors);
vector<int4> donorVector(numDonors); vector<int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) { for (int i = 0; i < numDonors; i++) {
...@@ -4348,7 +4238,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4348,7 +4238,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++) for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (float) parameters[j]; donorParamVector[i][j] = (float) parameters[j];
} }
donors->upload(donorVector); donors.upload(donorVector);
donorParams->setParameterValues(donorParamVector); donorParams->setParameterValues(donorParamVector);
vector<vector<float> > acceptorParamVector(numAcceptors); vector<vector<float> > acceptorParamVector(numAcceptors);
vector<int4> acceptorVector(numAcceptors); vector<int4> acceptorVector(numAcceptors);
...@@ -4359,7 +4249,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4359,7 +4249,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++) for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (float) parameters[j]; acceptorParamVector[i][j] = (float) parameters[j];
} }
acceptors->upload(acceptorVector); acceptors.upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector); acceptorParams->setParameterValues(acceptorParamVector);
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
...@@ -4395,10 +4285,10 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4395,10 +4285,10 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
else else
throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor"); throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
} }
donorExclusions = CudaArray::create<int4>(cu, numDonors, "customHbondDonorExclusions"); donorExclusions.initialize<int4>(cu, numDonors, "customHbondDonorExclusions");
acceptorExclusions = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptorExclusions"); acceptorExclusions.initialize<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions->upload(donorExclusionVector); donorExclusions.upload(donorExclusionVector);
acceptorExclusions->upload(acceptorExclusionVector); acceptorExclusions.upload(acceptorExclusionVector);
// Record the tabulated functions. // Record the tabulated functions.
...@@ -4406,6 +4296,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4406,6 +4296,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream tableArgs; stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -4414,8 +4305,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4414,8 +4305,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
tableArgs << ", const float"; tableArgs << ", const float";
if (width > 1) if (width > 1)
tableArgs << width; tableArgs << width;
...@@ -4430,8 +4321,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4430,8 +4321,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
globalParamNames[i] = force.getGlobalParameterName(i); globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i); globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
} }
if (globals != NULL) if (globals.isInitialized())
globals->upload(globalParamValues); globals.upload(globalParamValues);
map<string, string> variables; map<string, string> variables;
for (int i = 0; i < force.getNumPerDonorParameters(); i++) { for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const string& name = force.getPerDonorParameterName(i); const string& name = force.getPerDonorParameterName(i);
...@@ -4623,7 +4514,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust ...@@ -4623,7 +4514,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numDonors == 0 || numAcceptors == 0) if (numDonors == 0 || numAcceptors == 0)
return 0.0; return 0.0;
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -4632,7 +4523,7 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ ...@@ -4632,7 +4523,7 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
if (!hasInitializedKernel) { if (!hasInitializedKernel) {
hasInitializedKernel = true; hasInitializedKernel = true;
...@@ -4640,42 +4531,42 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ ...@@ -4640,42 +4531,42 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
donorArgs.push_back(&cu.getForce().getDevicePointer()); donorArgs.push_back(&cu.getForce().getDevicePointer());
donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer()); donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
donorArgs.push_back(&cu.getPosq().getDevicePointer()); donorArgs.push_back(&cu.getPosq().getDevicePointer());
donorArgs.push_back(&donorExclusions->getDevicePointer()); donorArgs.push_back(&donorExclusions.getDevicePointer());
donorArgs.push_back(&donors->getDevicePointer()); donorArgs.push_back(&donors.getDevicePointer());
donorArgs.push_back(&acceptors->getDevicePointer()); donorArgs.push_back(&acceptors.getDevicePointer());
donorArgs.push_back(cu.getPeriodicBoxSizePointer()); donorArgs.push_back(cu.getPeriodicBoxSizePointer());
donorArgs.push_back(cu.getInvPeriodicBoxSizePointer()); donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
donorArgs.push_back(cu.getPeriodicBoxVecXPointer()); donorArgs.push_back(cu.getPeriodicBoxVecXPointer());
donorArgs.push_back(cu.getPeriodicBoxVecYPointer()); donorArgs.push_back(cu.getPeriodicBoxVecYPointer());
donorArgs.push_back(cu.getPeriodicBoxVecZPointer()); donorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals != NULL) if (globals.isInitialized())
donorArgs.push_back(&globals->getDevicePointer()); donorArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : donorParams->getBuffers()) for (auto& buffer : donorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory()); donorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers()) for (auto& buffer : acceptorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory()); donorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
donorArgs.push_back(&function->getDevicePointer()); donorArgs.push_back(&function.getDevicePointer());
index = 0; index = 0;
acceptorArgs.push_back(&cu.getForce().getDevicePointer()); acceptorArgs.push_back(&cu.getForce().getDevicePointer());
acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer()); acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
acceptorArgs.push_back(&cu.getPosq().getDevicePointer()); acceptorArgs.push_back(&cu.getPosq().getDevicePointer());
acceptorArgs.push_back(&acceptorExclusions->getDevicePointer()); acceptorArgs.push_back(&acceptorExclusions.getDevicePointer());
acceptorArgs.push_back(&donors->getDevicePointer()); acceptorArgs.push_back(&donors.getDevicePointer());
acceptorArgs.push_back(&acceptors->getDevicePointer()); acceptorArgs.push_back(&acceptors.getDevicePointer());
acceptorArgs.push_back(cu.getPeriodicBoxSizePointer()); acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer()); acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer()); acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer()); acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer()); acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals != NULL) if (globals.isInitialized())
acceptorArgs.push_back(&globals->getDevicePointer()); acceptorArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : donorParams->getBuffers()) for (auto& buffer : donorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory()); acceptorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers()) for (auto& buffer : acceptorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory()); acceptorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
acceptorArgs.push_back(&function->getDevicePointer()); acceptorArgs.push_back(&function.getDevicePointer());
} }
int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4); int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4);
cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize); cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
...@@ -4775,22 +4666,6 @@ CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel() ...@@ -4775,22 +4666,6 @@ CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel()
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (auto function : tabulatedFunctions)
delete function;
} }
void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) { void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
...@@ -4827,22 +4702,22 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con ...@@ -4827,22 +4702,22 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
for (int j = 0; j < normalizedWeights[i].size(); j++) for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]); groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
} }
groupParticles = CudaArray::create<int>(cu, groupParticleVec.size(), "groupParticles"); groupParticles.initialize<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles->upload(groupParticleVec); groupParticles.upload(groupParticleVec);
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
groupWeights = CudaArray::create<double>(cu, groupParticleVec.size(), "groupWeights"); groupWeights.initialize<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecDouble); groupWeights.upload(groupWeightVecDouble);
centerPositions = CudaArray::create<double4>(cu, numGroups, "centerPositions"); centerPositions.initialize<double4>(cu, numGroups, "centerPositions");
} }
else { else {
groupWeights = CudaArray::create<float>(cu, groupParticleVec.size(), "groupWeights"); groupWeights.initialize<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecFloat); groupWeights.upload(groupWeightVecFloat);
centerPositions = CudaArray::create<float4>(cu, numGroups, "centerPositions"); centerPositions.initialize<float4>(cu, numGroups, "centerPositions");
} }
groupOffsets = CudaArray::create<int>(cu, groupOffsetVec.size(), "groupOffsets"); groupOffsets.initialize<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets->upload(groupOffsetVec); groupOffsets.upload(groupOffsetVec);
groupForces = CudaArray::create<long long>(cu, numGroups*3, "groupForces"); groupForces.initialize<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(*groupForces); cu.addAutoclearBuffer(groupForces);
// Record the bonds. // Record the bonds.
...@@ -4861,15 +4736,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con ...@@ -4861,15 +4736,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
paramVector[i][j] = (float) parameters[j]; paramVector[i][j] = (float) parameters[j];
} }
params->setParameterValues(paramVector); params->setParameterValues(paramVector);
bondGroups = CudaArray::create<int>(cu, bondGroupVec.size(), "bondGroups"); bondGroups.initialize<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups->upload(bondGroupVec); bondGroups.upload(bondGroupVec);
// Record the arguments to the force kernel. // Record the arguments to the force kernel.
groupForcesArgs.push_back(&groupForces->getDevicePointer()); groupForcesArgs.push_back(&groupForces.getDevicePointer());
groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
groupForcesArgs.push_back(&centerPositions->getDevicePointer()); groupForcesArgs.push_back(&centerPositions.getDevicePointer());
groupForcesArgs.push_back(&bondGroups->getDevicePointer()); groupForcesArgs.push_back(&bondGroups.getDevicePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxSizePointer()); groupForcesArgs.push_back(cu.getPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getInvPeriodicBoxSizePointer()); groupForcesArgs.push_back(cu.getInvPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxVecXPointer()); groupForcesArgs.push_back(cu.getPeriodicBoxVecXPointer());
...@@ -4885,6 +4760,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con ...@@ -4885,6 +4760,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream extraArgs; stringstream extraArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -4893,13 +4769,13 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con ...@@ -4893,13 +4769,13 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions.back()->upload(f); tabulatedFunctions[i].upload(f);
extraArgs << ", const float"; extraArgs << ", const float";
if (width > 1) if (width > 1)
extraArgs << width; extraArgs << width;
extraArgs << "* __restrict__ " << arrayName; extraArgs << "* __restrict__ " << arrayName;
groupForcesArgs.push_back(&tabulatedFunctions.back()->getDevicePointer()); groupForcesArgs.push_back(&tabulatedFunctions[i].getDevicePointer());
} }
// Record information about parameters. // Record information about parameters.
...@@ -4924,15 +4800,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con ...@@ -4924,15 +4800,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
extraArgs << ", mixed* __restrict__ energyParamDerivs"; extraArgs << ", mixed* __restrict__ energyParamDerivs";
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals->upload(globalParamValues); globals.upload(globalParamValues);
extraArgs << ", const float* __restrict__ globals"; extraArgs << ", const float* __restrict__ globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]"; string value = "globals["+cu.intToString(i)+"]";
variables[name] = value; variables[name] = value;
} }
groupForcesArgs.push_back(&globals->getDevicePointer()); groupForcesArgs.push_back(&globals.getDevicePointer());
} }
// Now to generate the kernel. First, it needs to calculate all distances, angles, // Now to generate the kernel. First, it needs to calculate all distances, angles,
...@@ -5144,7 +5020,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con ...@@ -5144,7 +5020,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0) if (numBonds == 0)
return 0.0; return 0.0;
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -5153,17 +5029,17 @@ double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool ...@@ -5153,17 +5029,17 @@ double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(), void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles.getDevicePointer(), &groupWeights.getDevicePointer(),
&groupOffsets->getDevicePointer(), &centerPositions->getDevicePointer()}; &groupOffsets.getDevicePointer(), &centerPositions.getDevicePointer()};
cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups); cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer(); groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
groupForcesArgs[9] = &cu.getEnergyParamDerivBuffer().getDevicePointer(); groupForcesArgs[9] = &cu.getEnergyParamDerivBuffer().getDevicePointer();
cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds); cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
void* applyForcesArgs[] = {&groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(), &groupOffsets->getDevicePointer(), void* applyForcesArgs[] = {&groupParticles.getDevicePointer(), &groupWeights.getDevicePointer(), &groupOffsets.getDevicePointer(),
&groupForces->getDevicePointer(), &cu.getForce().getDevicePointer()}; &groupForces.getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups); cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
return 0.0; return 0.0;
} }
...@@ -5222,10 +5098,6 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel() ...@@ -5222,10 +5098,6 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel()
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
for (auto function : tabulatedFunctions)
delete function;
} }
void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) { void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
...@@ -5256,16 +5128,16 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con ...@@ -5256,16 +5128,16 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
map<string, Lepton::CustomFunction*> functions; map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
CudaArray* array = CudaArray::create<float>(cu, f.size(), "TabulatedFunction"); tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions.push_back(array); tabulatedFunctions[i].upload(f);
array->upload(f); string arrayName = cu.getBondedUtilities().addArgument(tabulatedFunctions[i].getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
functionDefinitions.push_back(make_pair(name, arrayName)); functionDefinitions.push_back(make_pair(name, arrayName));
} }
...@@ -5289,9 +5161,9 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con ...@@ -5289,9 +5161,9 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals"); globals.initialize<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]"; string value = argName+"["+cu.intToString(i)+"]";
...@@ -5474,7 +5346,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con ...@@ -5474,7 +5346,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
} }
double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -5483,7 +5355,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool ...@@ -5483,7 +5355,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -5553,32 +5425,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel() ...@@ -5553,32 +5425,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel()
cu.setAsCurrent(); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (orderIndex != NULL)
delete orderIndex;
if (particleOrder != NULL)
delete particleOrder;
if (particleTypes != NULL)
delete particleTypes;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighborPairs != NULL)
delete neighborPairs;
if (numNeighborPairs != NULL)
delete numNeighborPairs;
if (neighborStartIndex != NULL)
delete neighborStartIndex;
if (neighbors != NULL)
delete neighbors;
if (numNeighborsForAtom != NULL)
delete numNeighborsForAtom;
for (auto function : tabulatedFunctions)
delete function;
} }
void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) { void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
...@@ -5612,6 +5458,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -5612,6 +5458,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream tableArgs; stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -5620,8 +5467,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -5620,8 +5467,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
tableArgs << ", const float"; tableArgs << ", const float";
if (width > 1) if (width > 1)
tableArgs << width; tableArgs << width;
...@@ -5667,16 +5514,16 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -5667,16 +5514,16 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec); CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
bool hasTypeFilters = (particleOrderVec.size() > 1); bool hasTypeFilters = (particleOrderVec.size() > 1);
if (hasTypeFilters) { if (hasTypeFilters) {
particleTypes = CudaArray::create<int>(cu, particleTypesVec.size(), "customManyParticleTypes"); particleTypes.initialize<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
orderIndex = CudaArray::create<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex"); orderIndex.initialize<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder = CudaArray::create<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder"); particleOrder.initialize<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes->upload(particleTypesVec); particleTypes.upload(particleTypesVec);
orderIndex->upload(orderIndexVec); orderIndex.upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder->getSize()); vector<int> flattenedOrder(particleOrder.getSize());
for (int i = 0; i < (int) particleOrderVec.size(); i++) for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int j = 0; j < particlesPerSet; j++) for (int j = 0; j < particlesPerSet; j++)
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j]; flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
particleOrder->upload(flattenedOrder); particleOrder.upload(flattenedOrder);
} }
// Build data structures for exclusions. // Build data structures for exclusions.
...@@ -5697,10 +5544,10 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -5697,10 +5544,10 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end()); exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionStartIndexVec[i+1] = exclusionsVec.size(); exclusionStartIndexVec[i+1] = exclusionsVec.size();
} }
exclusions = CudaArray::create<int>(cu, exclusionsVec.size(), "customManyParticleExclusions"); exclusions.initialize<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex = CudaArray::create<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart"); exclusionStartIndex.initialize<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions->upload(exclusionsVec); exclusions.upload(exclusionsVec);
exclusionStartIndex->upload(exclusionStartIndexVec); exclusionStartIndex.upload(exclusionStartIndexVec);
} }
// Build data structures for the neighbor list. // Build data structures for the neighbor list.
...@@ -5708,19 +5555,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con ...@@ -5708,19 +5555,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
int numAtomBlocks = cu.getNumAtomBlocks(); int numAtomBlocks = cu.getNumAtomBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter = new CudaArray(cu, numAtomBlocks, 4*elementSize, "blockCenter"); blockCenter.initialize(cu, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new CudaArray(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox"); blockBoundingBox.initialize(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs = CudaArray::create<int>(cu, 1, "customManyParticleNumNeighborPairs"); numNeighborPairs.initialize<int>(cu, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex = CudaArray::create<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex"); neighborStartIndex.initialize<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom = CudaArray::create<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom"); numNeighborsForAtom.initialize<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce"); CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");
// Select a size for the array that holds the neighbor list. We have to make a fairly // Select a size for the array that holds the neighbor list. We have to make a fairly
// arbitrary guess, but if this turns out to be too small we'll increase it later. // arbitrary guess, but if this turns out to be too small we'll increase it later.
maxNeighborPairs = 150*numParticles; maxNeighborPairs = 150*numParticles;
neighborPairs = CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs"); neighborPairs.initialize<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors = CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors"); neighbors.initialize<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
} }
// Now to generate the kernel. First, it needs to calculate all distances, angles, // Now to generate the kernel. First, it needs to calculate all distances, angles,
...@@ -6045,22 +5892,22 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -6045,22 +5892,22 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs.push_back(cu.getPeriodicBoxVecYPointer()); forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer()); forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
forceArgs.push_back(&neighbors->getDevicePointer()); forceArgs.push_back(&neighbors.getDevicePointer());
forceArgs.push_back(&neighborStartIndex->getDevicePointer()); forceArgs.push_back(&neighborStartIndex.getDevicePointer());
} }
if (particleTypes != NULL) { if (particleTypes.isInitialized()) {
forceArgs.push_back(&particleTypes->getDevicePointer()); forceArgs.push_back(&particleTypes.getDevicePointer());
forceArgs.push_back(&orderIndex->getDevicePointer()); forceArgs.push_back(&orderIndex.getDevicePointer());
forceArgs.push_back(&particleOrder->getDevicePointer()); forceArgs.push_back(&particleOrder.getDevicePointer());
} }
if (exclusions != NULL) { if (exclusions.isInitialized()) {
forceArgs.push_back(&exclusions->getDevicePointer()); forceArgs.push_back(&exclusions.getDevicePointer());
forceArgs.push_back(&exclusionStartIndex->getDevicePointer()); forceArgs.push_back(&exclusionStartIndex.getDevicePointer());
} }
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
forceArgs.push_back(&buffer.getMemory()); forceArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
forceArgs.push_back(&function->getDevicePointer()); forceArgs.push_back(&function.getDevicePointer());
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
// Set arguments for the block bounds kernel. // Set arguments for the block bounds kernel.
...@@ -6071,9 +5918,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -6071,9 +5918,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer()); blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer()); blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer());
blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer()); blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer());
blockBoundsArgs.push_back(&blockCenter->getDevicePointer()); blockBoundsArgs.push_back(&blockCenter.getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox->getDevicePointer()); blockBoundsArgs.push_back(&blockBoundingBox.getDevicePointer());
blockBoundsArgs.push_back(&numNeighborPairs->getDevicePointer()); blockBoundsArgs.push_back(&numNeighborPairs.getDevicePointer());
// Set arguments for the neighbor list kernel. // Set arguments for the neighbor list kernel.
...@@ -6083,32 +5930,32 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -6083,32 +5930,32 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer()); neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer()); neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer());
neighborsArgs.push_back(&cu.getPosq().getDevicePointer()); neighborsArgs.push_back(&cu.getPosq().getDevicePointer());
neighborsArgs.push_back(&blockCenter->getDevicePointer()); neighborsArgs.push_back(&blockCenter.getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox->getDevicePointer()); neighborsArgs.push_back(&blockBoundingBox.getDevicePointer());
neighborsArgs.push_back(&neighborPairs->getDevicePointer()); neighborsArgs.push_back(&neighborPairs.getDevicePointer());
neighborsArgs.push_back(&numNeighborPairs->getDevicePointer()); neighborsArgs.push_back(&numNeighborPairs.getDevicePointer());
neighborsArgs.push_back(&numNeighborsForAtom->getDevicePointer()); neighborsArgs.push_back(&numNeighborsForAtom.getDevicePointer());
neighborsArgs.push_back(&maxNeighborPairs); neighborsArgs.push_back(&maxNeighborPairs);
if (exclusions != NULL) { if (exclusions.isInitialized()) {
neighborsArgs.push_back(&exclusions->getDevicePointer()); neighborsArgs.push_back(&exclusions.getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex->getDevicePointer()); neighborsArgs.push_back(&exclusionStartIndex.getDevicePointer());
} }
// Set arguments for the kernel to find neighbor list start indices. // Set arguments for the kernel to find neighbor list start indices.
startIndicesArgs.push_back(&numNeighborsForAtom->getDevicePointer()); startIndicesArgs.push_back(&numNeighborsForAtom.getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex->getDevicePointer()); startIndicesArgs.push_back(&neighborStartIndex.getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs->getDevicePointer()); startIndicesArgs.push_back(&numNeighborPairs.getDevicePointer());
startIndicesArgs.push_back(&maxNeighborPairs); startIndicesArgs.push_back(&maxNeighborPairs);
// Set arguments for the kernel to assemble the final neighbor list. // Set arguments for the kernel to assemble the final neighbor list.
copyPairsArgs.push_back(&neighborPairs->getDevicePointer()); copyPairsArgs.push_back(&neighborPairs.getDevicePointer());
copyPairsArgs.push_back(&neighbors->getDevicePointer()); copyPairsArgs.push_back(&neighbors.getDevicePointer());
copyPairsArgs.push_back(&numNeighborPairs->getDevicePointer()); copyPairsArgs.push_back(&numNeighborPairs.getDevicePointer());
copyPairsArgs.push_back(&maxNeighborPairs); copyPairsArgs.push_back(&maxNeighborPairs);
copyPairsArgs.push_back(&numNeighborsForAtom->getDevicePointer()); copyPairsArgs.push_back(&numNeighborsForAtom.getDevicePointer());
copyPairsArgs.push_back(&neighborStartIndex->getDevicePointer()); copyPairsArgs.push_back(&neighborStartIndex.getDevicePointer());
} }
} }
if (globalParamValues.size() > 0) { if (globalParamValues.size() > 0) {
...@@ -6131,7 +5978,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -6131,7 +5978,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
// We need to make sure there was enough memory for the neighbor list. Download the // We need to make sure there was enough memory for the neighbor list. Download the
// information asynchronously so kernels can be running at the same time. // information asynchronously so kernels can be running at the same time.
numNeighborPairs->download(numPairs, false); numNeighborPairs.download(numPairs, false);
CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for CustomManyParticleForce"); CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for CustomManyParticleForce");
cu.executeKernel(startIndicesKernel, &startIndicesArgs[0], 256, 256, 256*sizeof(int)); cu.executeKernel(startIndicesKernel, &startIndicesArgs[0], 256, 256, 256*sizeof(int));
cu.executeKernel(copyPairsKernel, &copyPairsArgs[0], maxNeighborPairs); cu.executeKernel(copyPairsKernel, &copyPairsArgs[0], maxNeighborPairs);
...@@ -6145,17 +5992,13 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -6145,17 +5992,13 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
if (*numPairs > maxNeighborPairs) { if (*numPairs > maxNeighborPairs) {
// Resize the arrays and run the calculation again. // Resize the arrays and run the calculation again.
delete neighborPairs;
neighborPairs = NULL;
delete neighbors;
neighbors = NULL;
maxNeighborPairs = (int) (1.1*(*numPairs)); maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs = CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs"); neighborPairs.resize(maxNeighborPairs);
neighbors = CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors"); neighbors.resize(maxNeighborPairs);
forceArgs[5] = &neighbors->getDevicePointer(); forceArgs[5] = &neighbors.getDevicePointer();
neighborsArgs[5] = &neighborPairs->getDevicePointer(); neighborsArgs[5] = &neighborPairs.getDevicePointer();
copyPairsArgs[0] = &neighborPairs->getDevicePointer(); copyPairsArgs[0] = &neighborPairs.getDevicePointer();
copyPairsArgs[1] = &neighbors->getDevicePointer(); copyPairsArgs[1] = &neighbors.getDevicePointer();
continue; continue;
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment