Commit f15f591b authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert CudaArrays.

parent b8c86406
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -265,9 +265,8 @@ private:
class CudaCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
public:
CudaCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicBondForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
hasInitializedKernel(false), cu(cu), system(system) {
}
~CudaCalcHarmonicBondForceKernel();
/**
* Initialize the kernel.
*
......@@ -298,7 +297,7 @@ private:
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -307,7 +306,7 @@ private:
class CudaCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public:
CudaCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomBondForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
}
~CudaCalcCustomBondForceKernel();
/**
......@@ -341,7 +340,7 @@ private:
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
CudaArray globals;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
};
......@@ -352,9 +351,8 @@ private:
class CudaCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public:
CudaCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicAngleForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
hasInitializedKernel(false), cu(cu), system(system) {
}
~CudaCalcHarmonicAngleForceKernel();
/**
* Initialize the kernel.
*
......@@ -385,7 +383,7 @@ private:
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -394,7 +392,7 @@ private:
class CudaCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public:
CudaCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomAngleForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
}
~CudaCalcCustomAngleForceKernel();
/**
......@@ -428,7 +426,7 @@ private:
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
CudaArray globals;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
};
......@@ -439,9 +437,8 @@ private:
class CudaCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public:
CudaCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcPeriodicTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
hasInitializedKernel(false), cu(cu), system(system) {
}
~CudaCalcPeriodicTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -472,7 +469,7 @@ private:
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
CudaArray params;
};
/**
......@@ -481,9 +478,8 @@ private:
class CudaCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public:
CudaCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcRBTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params1(NULL), params2(NULL) {
hasInitializedKernel(false), cu(cu), system(system) {
}
~CudaCalcRBTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -514,8 +510,8 @@ private:
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params1;
CudaArray* params2;
CudaArray params1;
CudaArray params2;
};
/**
......@@ -524,9 +520,8 @@ private:
class CudaCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public:
CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCMAPTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), coefficients(NULL), mapPositions(NULL), torsionMaps(NULL) {
hasInitializedKernel(false), cu(cu), system(system) {
}
~CudaCalcCMAPTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -558,9 +553,9 @@ private:
ForceInfo* info;
const System& system;
std::vector<int2> mapPositionsVec;
CudaArray* coefficients;
CudaArray* mapPositions;
CudaArray* torsionMaps;
CudaArray coefficients;
CudaArray mapPositions;
CudaArray torsionMaps;
};
/**
......@@ -569,7 +564,7 @@ private:
class CudaCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public:
CudaCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomTorsionForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
}
~CudaCalcCustomTorsionForceKernel();
/**
......@@ -603,7 +598,7 @@ private:
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
CudaArray globals;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
};
......@@ -614,9 +609,7 @@ private:
class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL), pmeDispersionBsplineModuliY(NULL),
pmeDispersionBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
cu(cu), hasInitializedFFT(false), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
}
~CudaCalcNonbondedForceKernel();
/**
......@@ -682,20 +675,20 @@ private:
CudaContext& cu;
ForceInfo* info;
bool hasInitializedFFT;
CudaArray* sigmaEpsilon;
CudaArray* exceptionParams;
CudaArray* cosSinSums;
CudaArray* directPmeGrid;
CudaArray* reciprocalPmeGrid;
CudaArray* pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ;
CudaArray* pmeDispersionBsplineModuliX;
CudaArray* pmeDispersionBsplineModuliY;
CudaArray* pmeDispersionBsplineModuliZ;
CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex;
CudaArray* pmeEnergyBuffer;
CudaArray sigmaEpsilon;
CudaArray exceptionParams;
CudaArray cosSinSums;
CudaArray directPmeGrid;
CudaArray reciprocalPmeGrid;
CudaArray pmeBsplineModuliX;
CudaArray pmeBsplineModuliY;
CudaArray pmeBsplineModuliZ;
CudaArray pmeDispersionBsplineModuliX;
CudaArray pmeDispersionBsplineModuliY;
CudaArray pmeDispersionBsplineModuliZ;
CudaArray pmeAtomRange;
CudaArray pmeAtomGridIndex;
CudaArray pmeEnergyBuffer;
CudaSort* sort;
Kernel cpuPme;
PmeIO* pmeio;
......@@ -737,7 +730,7 @@ private:
class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public:
CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomNonbondedForceKernel(name, platform),
cu(cu), params(NULL), globals(NULL), interactionGroupData(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
cu(cu), params(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
}
~CudaCalcCustomNonbondedForceKernel();
/**
......@@ -769,13 +762,13 @@ private:
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
CudaArray* interactionGroupData;
CudaArray globals;
CudaArray interactionGroupData;
CUfunction interactionGroupKernel;
std::vector<void*> interactionGroupArgs;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions;
std::vector<CudaArray> tabulatedFunctions;
double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel, hasParamDerivs;
......@@ -790,9 +783,8 @@ private:
class CudaCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
public:
CudaCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGBSAOBCForceKernel(name, platform), cu(cu),
hasCreatedKernels(false), params(NULL), bornSum(NULL), bornRadii(NULL), bornForce(NULL), obcChain(NULL) {
hasCreatedKernels(false) {
}
~CudaCalcGBSAOBCForceKernel();
/**
* Initialize the kernel.
*
......@@ -823,11 +815,11 @@ private:
int maxTiles;
CudaContext& cu;
ForceInfo* info;
CudaArray* params;
CudaArray* bornSum;
CudaArray* bornRadii;
CudaArray* bornForce;
CudaArray* obcChain;
CudaArray params;
CudaArray bornSum;
CudaArray bornRadii;
CudaArray bornForce;
CudaArray obcChain;
CUfunction computeBornSumKernel;
CUfunction reduceBornSumKernel;
CUfunction force1Kernel;
......@@ -841,8 +833,7 @@ private:
class CudaCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public:
CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), longEnergyDerivs(NULL), globals(NULL),
valueBuffers(NULL), system(system) {
hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), system(system) {
}
~CudaCalcCustomGBForceKernel();
/**
......@@ -880,13 +871,13 @@ private:
CudaParameterSet* energyDerivs;
CudaParameterSet* energyDerivChain;
std::vector<CudaParameterSet*> dValuedParam;
std::vector<CudaArray*> dValue0dParam;
CudaArray* longEnergyDerivs;
CudaArray* globals;
CudaArray* valueBuffers;
std::vector<CudaArray> dValue0dParam;
CudaArray longEnergyDerivs;
CudaArray globals;
CudaArray valueBuffers;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions;
std::vector<CudaArray> tabulatedFunctions;
std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
const System& system;
CUfunction pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
......@@ -901,7 +892,7 @@ private:
class CudaCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
public:
CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomExternalForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
}
~CudaCalcCustomExternalForceKernel();
/**
......@@ -935,7 +926,7 @@ private:
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
CudaArray globals;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
};
......@@ -946,8 +937,7 @@ private:
class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
public:
CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomHbondForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL),
globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL), system(system) {
hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), system(system) {
}
~CudaCalcCustomHbondForceKernel();
/**
......@@ -981,14 +971,14 @@ private:
ForceInfo* info;
CudaParameterSet* donorParams;
CudaParameterSet* acceptorParams;
CudaArray* globals;
CudaArray* donors;
CudaArray* acceptors;
CudaArray* donorExclusions;
CudaArray* acceptorExclusions;
CudaArray globals;
CudaArray donors;
CudaArray acceptors;
CudaArray donorExclusions;
CudaArray acceptorExclusions;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions;
std::vector<CudaArray> tabulatedFunctions;
std::vector<void*> donorArgs, acceptorArgs;
const System& system;
CUfunction donorKernel, acceptorKernel;
......@@ -1000,7 +990,7 @@ private:
class CudaCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel {
public:
CudaCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCentroidBondForceKernel(name, platform),
cu(cu), params(NULL), globals(NULL), groupParticles(NULL), groupWeights(NULL), groupOffsets(NULL), groupForces(NULL), bondGroups(NULL), centerPositions(NULL), system(system) {
cu(cu), params(NULL), system(system) {
}
~CudaCalcCustomCentroidBondForceKernel();
/**
......@@ -1034,16 +1024,16 @@ private:
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
CudaArray* groupParticles;
CudaArray* groupWeights;
CudaArray* groupOffsets;
CudaArray* groupForces;
CudaArray* bondGroups;
CudaArray* centerPositions;
CudaArray globals;
CudaArray groupParticles;
CudaArray groupWeights;
CudaArray groupOffsets;
CudaArray groupForces;
CudaArray bondGroups;
CudaArray centerPositions;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions;
std::vector<CudaArray> tabulatedFunctions;
std::vector<void*> groupForcesArgs;
CUfunction computeCentersKernel, groupForcesKernel, applyForcesKernel;
const System& system;
......@@ -1055,7 +1045,7 @@ private:
class CudaCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
public:
CudaCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCompoundBondForceKernel(name, platform),
cu(cu), params(NULL), globals(NULL), system(system) {
cu(cu), params(NULL), system(system) {
}
~CudaCalcCustomCompoundBondForceKernel();
/**
......@@ -1088,10 +1078,10 @@ private:
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
CudaArray globals;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions;
std::vector<CudaArray> tabulatedFunctions;
const System& system;
};
......@@ -1101,9 +1091,7 @@ private:
class CudaCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel {
public:
CudaCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomManyParticleForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), params(NULL), particleTypes(NULL), orderIndex(NULL), particleOrder(NULL), exclusions(NULL),
exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighborPairs(NULL), numNeighborPairs(NULL), neighborStartIndex(NULL),
numNeighborsForAtom(NULL), neighbors(NULL), system(system) {
hasInitializedKernel(false), cu(cu), params(NULL), system(system) {
}
~CudaCalcCustomManyParticleForceKernel();
/**
......@@ -1138,21 +1126,21 @@ private:
NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
CudaParameterSet* params;
CudaArray* particleTypes;
CudaArray* orderIndex;
CudaArray* particleOrder;
CudaArray* exclusions;
CudaArray* exclusionStartIndex;
CudaArray* blockCenter;
CudaArray* blockBoundingBox;
CudaArray* neighborPairs;
CudaArray* numNeighborPairs;
CudaArray* neighborStartIndex;
CudaArray* numNeighborsForAtom;
CudaArray* neighbors;
CudaArray particleTypes;
CudaArray orderIndex;
CudaArray particleOrder;
CudaArray exclusions;
CudaArray exclusionStartIndex;
CudaArray blockCenter;
CudaArray blockBoundingBox;
CudaArray neighborPairs;
CudaArray numNeighborPairs;
CudaArray neighborStartIndex;
CudaArray numNeighborsForAtom;
CudaArray neighbors;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions;
std::vector<CudaArray> tabulatedFunctions;
std::vector<void*> forceArgs, blockBoundsArgs, neighborsArgs, startIndicesArgs, copyPairsArgs;
const System& system;
CUfunction forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel;
......
......@@ -84,8 +84,10 @@ void CudaBondedUtilities::initialize(const System& system) {
for (int i = 0; i < numForces; i++) {
int numBonds = forceAtoms[i].size();
int numAtoms = forceAtoms[i][0].size();
int numArrays = (numAtoms+3)/4;
int startAtom = 0;
while (startAtom < numAtoms) {
atomIndices[i].resize(numArrays);
for (int j = 0; j < numArrays; j++) {
int width = min(numAtoms-startAtom, 4);
int paddedWidth = (width == 3 ? 4 : width);
vector<unsigned int> indexVec(paddedWidth*numBonds);
......@@ -93,9 +95,8 @@ void CudaBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < width; atom++)
indexVec[bond*paddedWidth+atom] = forceAtoms[i][bond][startAtom+atom];
}
atomIndices[i].push_back(CudaArray());
atomIndices[i].back().initialize(context, numBonds, 4*paddedWidth, "bondedIndices");
atomIndices[i].back().upload(&indexVec[0]);
atomIndices[i][j].initialize(context, numBonds, 4*paddedWidth, "bondedIndices");
atomIndices[i][j].upload(&indexVec[0]);
startAtom += width;
}
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -505,12 +505,6 @@ private:
const HarmonicBondForce& force;
};
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
......@@ -520,18 +514,18 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
if (numBonds == 0)
return;
vector<vector<int> > atoms(numBonds, vector<int>(2));
params = CudaArray::create<float2>(cu, numBonds, "bondParams");
params.initialize<float2>(cu, numBonds, "bondParams");
vector<float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params->upload(paramVector);
params.upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
......@@ -560,7 +554,7 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params->upload(paramVector);
params.upload(paramVector);
// Mark that the current reordering may be invalid.
......@@ -600,8 +594,6 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
......@@ -649,9 +641,9 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
......@@ -680,7 +672,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
}
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -689,7 +681,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
return 0.0;
}
......@@ -749,12 +741,6 @@ private:
const HarmonicAngleForce& force;
};
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
......@@ -764,7 +750,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
if (numAngles == 0)
return;
vector<vector<int> > atoms(numAngles, vector<int>(3));
params = CudaArray::create<float2>(cu, numAngles, "angleParams");
params.initialize<float2>(cu, numAngles, "angleParams");
vector<float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
double angle, k;
......@@ -772,11 +758,11 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
paramVector[i] = make_float2((float) angle, (float) k);
}
params->upload(paramVector);
params.upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
......@@ -805,7 +791,7 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params->upload(paramVector);
params.upload(paramVector);
// Mark that the current reordering may be invalid.
......@@ -846,8 +832,6 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
......@@ -895,9 +879,9 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
......@@ -926,7 +910,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
}
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -935,7 +919,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
return 0.0;
}
......@@ -996,12 +980,6 @@ private:
const PeriodicTorsionForce& force;
};
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
......@@ -1011,7 +989,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
if (numTorsions == 0)
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params = CudaArray::create<float4>(cu, numTorsions, "periodicTorsionParams");
params.initialize<float4>(cu, numTorsions, "periodicTorsionParams");
vector<float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int periodicity;
......@@ -1019,11 +997,11 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params->upload(paramVector);
params.upload(paramVector);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
......@@ -1052,7 +1030,7 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params->upload(paramVector);
params.upload(paramVector);
// Mark that the current reordering may be invalid.
......@@ -1087,14 +1065,6 @@ private:
const RBTorsionForce& force;
};
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
cu.setAsCurrent();
if (params1 != NULL)
delete params1;
if (params2 != NULL)
delete params2;
}
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
......@@ -1104,8 +1074,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
if (numTorsions == 0)
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params1 = CudaArray::create<float4>(cu, numTorsions, "rbTorsionParams1");
params2 = CudaArray::create<float2>(cu, numTorsions, "rbTorsionParams2");
params1.initialize<float4>(cu, numTorsions, "rbTorsionParams1");
params2.initialize<float2>(cu, numTorsions, "rbTorsionParams2");
vector<float4> paramVector1(numTorsions);
vector<float2> paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
......@@ -1115,13 +1085,13 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1->upload(paramVector1);
params2->upload(paramVector2);
params1.upload(paramVector1);
params2.upload(paramVector2);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1->getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2->getDevicePointer(), "float2");
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1.getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
......@@ -1152,8 +1122,8 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1->upload(paramVector1);
params2->upload(paramVector2);
params1.upload(paramVector1);
params2.upload(paramVector2);
// Mark that the current reordering may be invalid.
......@@ -1190,15 +1160,6 @@ private:
const CMAPTorsionForce& force;
};
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
......@@ -1230,17 +1191,17 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
vector<int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = CudaArray::create<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = CudaArray::create<int2>(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps = CudaArray::create<int>(cu, numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec);
mapPositions->upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec);
coefficients.initialize<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions.initialize<int2>(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps.initialize<int>(cu, numTorsions, "cmapTorsionMaps");
coefficients.upload(coeffVec);
mapPositions.upload(mapPositionsVec);
torsionMaps.upload(torsionMapsVec);
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients->getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions->getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps->getDevicePointer(), "int");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients.getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions.getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps.getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
......@@ -1256,9 +1217,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (mapPositions->getSize() != numMaps)
if (mapPositions.getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps->getSize() != numTorsions)
if (torsionMaps.getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
......@@ -1281,7 +1242,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
coefficients->upload(coeffVec);
coefficients.upload(coeffVec);
// Update the indices.
......@@ -1290,7 +1251,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
torsionMaps->upload(torsionMapsVec);
torsionMaps.upload(torsionMapsVec);
}
class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
......@@ -1327,8 +1288,6 @@ private:
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
......@@ -1376,9 +1335,9 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
......@@ -1407,7 +1366,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
}
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -1416,7 +1375,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
return 0.0;
}
......@@ -1576,34 +1535,6 @@ private:
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu.setAsCurrent();
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (directPmeGrid != NULL)
delete directPmeGrid;
if (reciprocalPmeGrid != NULL)
delete reciprocalPmeGrid;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL)
delete sort;
if (fft != NULL)
......@@ -1647,7 +1578,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
......@@ -1682,7 +1613,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exclusionList[exclusion.second].push_back(exclusion.first);
}
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
sigmaEpsilon.upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
......@@ -1740,7 +1671,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
cosSinSums.initialize(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
......@@ -1844,22 +1775,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
directPmeGrid = new CudaArray(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(*directPmeGrid);
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
directPmeGrid.initialize(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid.initialize(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(directPmeGrid);
pmeBsplineModuliX.initialize(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY.initialize(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ.initialize(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX = new CudaArray(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY = new CudaArray(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ = new CudaArray(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
pmeDispersionBsplineModuliX.initialize(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY.initialize(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ.initialize(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
pmeAtomRange.initialize<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex.initialize<int2>(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(*pmeEnergyBuffer);
pmeEnergyBuffer.initialize(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(pmeEnergyBuffer);
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion;
cufftGetVersion(&cufftVersion);
......@@ -1903,7 +1834,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), pmeEnergyBuffer, recipForceGroup));
}
hasInitializedFFT = true;
......@@ -1916,9 +1847,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
xmoduli = &pmeBsplineModuliX;
ymoduli = &pmeBsplineModuliY;
zmoduli = &pmeBsplineModuliZ;
}
else {
if (!doLJPME)
......@@ -1926,9 +1857,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
xmoduli = &pmeDispersionBsplineModuliX;
ymoduli = &pmeDispersionBsplineModuliY;
zmoduli = &pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder);
......@@ -2008,7 +1939,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
sizeof(float2), sigmaEpsilon.getDevicePointer()));
// Initialize the exceptions.
......@@ -2019,7 +1950,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = CudaArray::create<float4>(cu, numExceptions, "exceptionParams");
exceptionParams.initialize<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
......@@ -2027,9 +1958,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
exceptionParams.upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams->getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
info = new ForceInfo(force);
......@@ -2037,13 +1968,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
if (cosSinSums != NULL && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums->getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()};
if (cosSinSums.isInitialized() && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
if (directPmeGrid != NULL && includeReciprocal) {
if (directPmeGrid.isInitialized()&& includeReciprocal) {
if (usePmeStream)
cu.setCurrentStream(pmeStream);
......@@ -2075,118 +2006,118 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(fftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer());
cufftExecD2Z(fftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
else
cufftExecR2C(fftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer());
cufftExecR2C(fftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
}
else {
fft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true);
fft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
}
void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer());
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
else
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*) directPmeGrid->getDevicePointer());
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*) directPmeGrid.getDevicePointer());
}
else {
fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
fft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
if (doLJPME) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
cu.clearBuffer(*directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.clearBuffer(directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&sigmaEpsilon->getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon.getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer());
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
else
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer());
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
}
else {
dispersionFft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true);
dispersionFft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer());
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
else
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*) directPmeGrid->getDevicePointer());
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*) directPmeGrid.getDevicePointer());
}
else {
dispersionFft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
dispersionFft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&sigmaEpsilon->getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon.getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (usePmeStream) {
......@@ -2253,7 +2184,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
sumSquaredCharges += charge*charge;
}
cu.setCharges(chargeVector);
sigmaEpsilon->upload(sigmaEpsilonVector);
sigmaEpsilon.upload(sigmaEpsilonVector);
// Record the exceptions.
......@@ -2265,7 +2196,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
}
exceptionParams->upload(exceptionParamsVector);
exceptionParams.upload(exceptionParamsVector);
}
// Compute other values.
......@@ -2355,12 +2286,6 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (auto function : tabulatedFunctions)
delete function;
if (forceCopy != NULL)
delete forceCopy;
}
......@@ -2377,7 +2302,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
vector<vector<float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
......@@ -2402,6 +2327,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
......@@ -2410,9 +2336,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
if (width == 1)
tableTypes.push_back("float");
else
......@@ -2427,8 +2353,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
if (globals.isInitialized())
globals.upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
......@@ -2482,9 +2408,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
if (globals.isInitialized()) {
globals.upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals.getDevicePointer()));
}
}
info = new ForceInfo(force);
......@@ -2660,8 +2586,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(make_int4(0, 0, 0, 0));
}
interactionGroupData = CudaArray::create<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData->upload(groupData);
interactionGroupData.initialize<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData.upload(groupData);
// Create the kernel.
......@@ -2687,7 +2613,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1);
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
if (globals != NULL)
if (globals.isInitialized())
args<<", const float* __restrict__ globals";
if (hasParamDerivs)
args << ", mixed* __restrict__ energyParamDerivs";
......@@ -2758,7 +2684,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
}
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -2767,7 +2693,7 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
globalParamValues[i] = value;
}
if (changed) {
globals->upload(globalParamValues);
globals.upload(globalParamValues);
if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
......@@ -2778,13 +2704,13 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
}
if (interactionGroupData != NULL) {
if (interactionGroupData.isInitialized()) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData->getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData.getDevicePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
......@@ -2792,10 +2718,10 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
for (auto& buffer : params->getBuffers())
interactionGroupArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
interactionGroupArgs.push_back(&function->getDevicePointer());
if (globals != NULL)
interactionGroupArgs.push_back(&globals->getDevicePointer());
for (auto& function : tabulatedFunctions)
interactionGroupArgs.push_back(&function.getDevicePointer());
if (globals.isInitialized())
interactionGroupArgs.push_back(&globals.getDevicePointer());
if (hasParamDerivs)
interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
}
......@@ -2855,38 +2781,24 @@ private:
const GBSAOBCForce& force;
};
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (obcChain != NULL)
delete obcChain;
}
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
params = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
params.initialize<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
if (cu.getUseDoublePrecision()) {
bornRadii = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
bornRadii.initialize<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain.initialize<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
}
else {
bornRadii = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
bornRadii.initialize<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain.initialize<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
}
bornSum = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(*bornSum);
cu.addAutoclearBuffer(*bornForce);
bornSum.initialize<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce.initialize<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(bornSum);
cu.addAutoclearBuffer(bornForce);
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
......@@ -2904,7 +2816,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
posqf[i] = make_float4(0, 0, 0, (float) charge);
}
posq.upload(&temp[0]);
params->upload(paramsVector);
params.upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
......@@ -2912,8 +2824,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params.getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce.getDevicePointer()));
info = new ForceInfo(force);
cu.addForce(info);
}
......@@ -2951,9 +2863,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
map<string, string> replacements;
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
computeSumArgs.push_back(&bornSum->getDevicePointer());
computeSumArgs.push_back(&bornSum.getDevicePointer());
computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
computeSumArgs.push_back(&params->getDevicePointer());
computeSumArgs.push_back(&params.getDevicePointer());
if (nb.getUseCutoff()) {
computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
......@@ -2972,10 +2884,10 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
force1Kernel = cu.getKernel(module, "computeGBSAForce1");
force1Args.push_back(&cu.getForce().getDevicePointer());
force1Args.push_back(&bornForce->getDevicePointer());
force1Args.push_back(&bornForce.getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&bornRadii->getDevicePointer());
force1Args.push_back(&bornRadii.getDevicePointer());
force1Args.push_back(NULL);
if (nb.getUseCutoff()) {
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
......@@ -3008,12 +2920,12 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
}
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum->getDevicePointer(), &params->getDevicePointer(),
&bornRadii->getDevicePointer(), &obcChain->getDevicePointer()};
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum.getDevicePointer(), &params.getDevicePointer(),
&bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
void* reduceForceArgs[] = {&bornForce->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &params->getDevicePointer(),
&bornRadii->getDevicePointer(), &obcChain->getDevicePointer()};
void* reduceForceArgs[] = {&bornForce.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &params.getDevicePointer(),
&bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
return 0.0;
}
......@@ -3039,7 +2951,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
}
cu.setCharges(chargeVector);
params->upload(paramsVector);
params.upload(paramsVector);
// Mark that the current reordering may be invalid.
......@@ -3087,16 +2999,6 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
delete energyDerivs;
if (energyDerivChain != NULL)
delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
for (auto function : tabulatedFunctions)
delete function;
for (auto d : dValue0dParam)
delete d;
for (auto d : dValuedParam)
delete d;
}
......@@ -3135,7 +3037,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
vector<vector<float> > paramVector(paddedNumParticles, vector<float>(numParams, 0));
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
......@@ -3159,6 +3061,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
......@@ -3167,9 +3070,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
tableArgs << ", const float";
if (width > 1)
tableArgs << width;
......@@ -3184,8 +3087,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
if (globals.isInitialized())
globals.upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
......@@ -3233,15 +3136,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
}
longEnergyDerivs = CudaArray::create<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
longEnergyDerivs.initialize<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true, cu.getUseDoublePrecision()));
dValue0dParam.push_back(CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam"));
cu.addAutoclearBuffer(*dValue0dParam.back());
dValue0dParam[i].initialize<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam");
cu.addAutoclearBuffer(dValue0dParam[i]);
string name = force.getEnergyParameterDerivativeName(i);
cu.addEnergyParameterDerivative(name);
}
......@@ -3826,9 +3730,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
if (globals != NULL) {
globals->upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
if (globals.isInitialized()) {
globals.upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals.getDevicePointer()));
}
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (auto param : parameters)
......@@ -3838,7 +3742,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
}
info = new ForceInfo(force);
cu.addForce(info);
cu.addAutoclearBuffer(*longEnergyDerivs);
cu.addAutoclearBuffer(longEnergyDerivs);
}
double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -3881,13 +3785,13 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
// Set arguments for kernels.
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
valueBuffers = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
cu.addAutoclearBuffer(*valueBuffers);
cu.clearBuffer(valueBuffers->getDevicePointer(), sizeof(long long)*valueBuffers->getSize());
valueBuffers.initialize<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
cu.addAutoclearBuffer(valueBuffers);
cu.clearBuffer(valueBuffers.getDevicePointer(), sizeof(long long)*valueBuffers.getSize());
pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
pairValueArgs.push_back(&valueBuffers->getDevicePointer());
pairValueArgs.push_back(&valueBuffers.getDevicePointer());
if (nb.getUseCutoff()) {
pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
......@@ -3903,31 +3807,31 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
else
pairValueArgs.push_back(&maxTiles);
if (globals != NULL)
pairValueArgs.push_back(&globals->getDevicePointer());
if (globals.isInitialized())
pairValueArgs.push_back(&globals.getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i])
pairValueArgs.push_back(&params->getBuffers()[i].getMemory());
}
for (auto d : dValue0dParam)
pairValueArgs.push_back(&d->getDevicePointer());
for (auto function : tabulatedFunctions)
pairValueArgs.push_back(&function->getDevicePointer());
for (auto& d : dValue0dParam)
pairValueArgs.push_back(&d.getDevicePointer());
for (auto& function : tabulatedFunctions)
pairValueArgs.push_back(&function.getDevicePointer());
perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers->getDevicePointer());
if (globals != NULL)
perParticleValueArgs.push_back(&globals->getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers.getDevicePointer());
if (globals.isInitialized())
perParticleValueArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : params->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory());
for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueArgs.push_back(&dValue0dParam[i]->getDevicePointer());
perParticleValueArgs.push_back(&dValue0dParam[i].getDevicePointer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
}
for (auto function : tabulatedFunctions)
perParticleValueArgs.push_back(&function->getDevicePointer());
for (auto& function : tabulatedFunctions)
perParticleValueArgs.push_back(&function.getDevicePointer());
pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
......@@ -3949,8 +3853,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
else
pairEnergyArgs.push_back(&maxTiles);
if (globals != NULL)
pairEnergyArgs.push_back(&globals->getDevicePointer());
if (globals.isInitialized())
pairEnergyArgs.push_back(&globals.getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i])
pairEnergyArgs.push_back(&params->getBuffers()[i].getMemory());
......@@ -3959,16 +3863,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
if (pairEnergyUsesValue[i])
pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
}
pairEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
pairEnergyArgs.push_back(&longEnergyDerivs.getDevicePointer());
if (needEnergyParamDerivs)
pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions)
pairEnergyArgs.push_back(&function->getDevicePointer());
for (auto& function : tabulatedFunctions)
pairEnergyArgs.push_back(&function.getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals != NULL)
perParticleEnergyArgs.push_back(&globals->getDevicePointer());
if (globals.isInitialized())
perParticleEnergyArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : params->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
......@@ -3977,16 +3881,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : energyDerivChain->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory());
perParticleEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
perParticleEnergyArgs.push_back(&longEnergyDerivs.getDevicePointer());
if (needEnergyParamDerivs)
perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions)
perParticleEnergyArgs.push_back(&function->getDevicePointer());
for (auto& function : tabulatedFunctions)
perParticleEnergyArgs.push_back(&function.getDevicePointer());
if (needParameterGradient || needEnergyParamDerivs) {
gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals != NULL)
gradientChainRuleArgs.push_back(&globals->getDevicePointer());
if (globals.isInitialized())
gradientChainRuleArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : params->getBuffers())
gradientChainRuleArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
......@@ -4001,7 +3905,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -4010,7 +3914,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
pairEnergyArgs[5] = &includeEnergy;
if (nb.getUseCutoff()) {
......@@ -4089,8 +3993,6 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
......@@ -4146,9 +4048,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
variables[name] = "particleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
......@@ -4170,7 +4072,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
}
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -4179,7 +4081,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
return 0.0;
}
......@@ -4294,18 +4196,6 @@ CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
delete donorParams;
if (acceptorParams != NULL)
delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
for (auto function : tabulatedFunctions)
delete function;
}
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
......@@ -4333,12 +4223,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
if (numDonors == 0 || numAcceptors == 0)
return;
int numParticles = system.getNumParticles();
donors = CudaArray::create<int4>(cu, numDonors, "customHbondDonors");
acceptors = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptors");
donors.initialize<int4>(cu, numDonors, "customHbondDonors");
acceptors.initialize<int4>(cu, numAcceptors, "customHbondAcceptors");
donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
vector<vector<float> > donorParamVector(numDonors);
vector<int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) {
......@@ -4348,7 +4238,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (float) parameters[j];
}
donors->upload(donorVector);
donors.upload(donorVector);
donorParams->setParameterValues(donorParamVector);
vector<vector<float> > acceptorParamVector(numAcceptors);
vector<int4> acceptorVector(numAcceptors);
......@@ -4359,7 +4249,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (float) parameters[j];
}
acceptors->upload(acceptorVector);
acceptors.upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector);
info = new ForceInfo(force);
cu.addForce(info);
......@@ -4395,10 +4285,10 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
else
throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
}
donorExclusions = CudaArray::create<int4>(cu, numDonors, "customHbondDonorExclusions");
acceptorExclusions = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions->upload(donorExclusionVector);
acceptorExclusions->upload(acceptorExclusionVector);
donorExclusions.initialize<int4>(cu, numDonors, "customHbondDonorExclusions");
acceptorExclusions.initialize<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions.upload(donorExclusionVector);
acceptorExclusions.upload(acceptorExclusionVector);
// Record the tabulated functions.
......@@ -4406,6 +4296,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
......@@ -4414,8 +4305,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
tableArgs << ", const float";
if (width > 1)
tableArgs << width;
......@@ -4430,8 +4321,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
if (globals.isInitialized())
globals.upload(globalParamValues);
map<string, string> variables;
for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const string& name = force.getPerDonorParameterName(i);
......@@ -4623,7 +4514,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numDonors == 0 || numAcceptors == 0)
return 0.0;
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -4632,7 +4523,7 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
if (!hasInitializedKernel) {
hasInitializedKernel = true;
......@@ -4640,42 +4531,42 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
donorArgs.push_back(&cu.getForce().getDevicePointer());
donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
donorArgs.push_back(&cu.getPosq().getDevicePointer());
donorArgs.push_back(&donorExclusions->getDevicePointer());
donorArgs.push_back(&donors->getDevicePointer());
donorArgs.push_back(&acceptors->getDevicePointer());
donorArgs.push_back(&donorExclusions.getDevicePointer());
donorArgs.push_back(&donors.getDevicePointer());
donorArgs.push_back(&acceptors.getDevicePointer());
donorArgs.push_back(cu.getPeriodicBoxSizePointer());
donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
donorArgs.push_back(cu.getPeriodicBoxVecXPointer());
donorArgs.push_back(cu.getPeriodicBoxVecYPointer());
donorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals != NULL)
donorArgs.push_back(&globals->getDevicePointer());
if (globals.isInitialized())
donorArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : donorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
donorArgs.push_back(&function->getDevicePointer());
for (auto& function : tabulatedFunctions)
donorArgs.push_back(&function.getDevicePointer());
index = 0;
acceptorArgs.push_back(&cu.getForce().getDevicePointer());
acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
acceptorArgs.push_back(&cu.getPosq().getDevicePointer());
acceptorArgs.push_back(&acceptorExclusions->getDevicePointer());
acceptorArgs.push_back(&donors->getDevicePointer());
acceptorArgs.push_back(&acceptors->getDevicePointer());
acceptorArgs.push_back(&acceptorExclusions.getDevicePointer());
acceptorArgs.push_back(&donors.getDevicePointer());
acceptorArgs.push_back(&acceptors.getDevicePointer());
acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals != NULL)
acceptorArgs.push_back(&globals->getDevicePointer());
if (globals.isInitialized())
acceptorArgs.push_back(&globals.getDevicePointer());
for (auto& buffer : donorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
acceptorArgs.push_back(&function->getDevicePointer());
for (auto& function : tabulatedFunctions)
acceptorArgs.push_back(&function.getDevicePointer());
}
int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4);
cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
......@@ -4775,22 +4666,6 @@ CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel()
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (auto function : tabulatedFunctions)
delete function;
}
void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
......@@ -4827,22 +4702,22 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
groupParticles = CudaArray::create<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles->upload(groupParticleVec);
groupParticles.initialize<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles.upload(groupParticleVec);
if (cu.getUseDoublePrecision()) {
groupWeights = CudaArray::create<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecDouble);
centerPositions = CudaArray::create<double4>(cu, numGroups, "centerPositions");
groupWeights.initialize<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights.upload(groupWeightVecDouble);
centerPositions.initialize<double4>(cu, numGroups, "centerPositions");
}
else {
groupWeights = CudaArray::create<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecFloat);
centerPositions = CudaArray::create<float4>(cu, numGroups, "centerPositions");
groupWeights.initialize<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights.upload(groupWeightVecFloat);
centerPositions.initialize<float4>(cu, numGroups, "centerPositions");
}
groupOffsets = CudaArray::create<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets->upload(groupOffsetVec);
groupForces = CudaArray::create<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(*groupForces);
groupOffsets.initialize<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets.upload(groupOffsetVec);
groupForces.initialize<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(groupForces);
// Record the bonds.
......@@ -4861,15 +4736,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
bondGroups = CudaArray::create<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups->upload(bondGroupVec);
bondGroups.initialize<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups.upload(bondGroupVec);
// Record the arguments to the force kernel.
groupForcesArgs.push_back(&groupForces->getDevicePointer());
groupForcesArgs.push_back(&groupForces.getDevicePointer());
groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
groupForcesArgs.push_back(&centerPositions->getDevicePointer());
groupForcesArgs.push_back(&bondGroups->getDevicePointer());
groupForcesArgs.push_back(&centerPositions.getDevicePointer());
groupForcesArgs.push_back(&bondGroups.getDevicePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getInvPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxVecXPointer());
......@@ -4885,6 +4760,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
......@@ -4893,13 +4769,13 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions.back()->upload(f);
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
extraArgs << ", const float";
if (width > 1)
extraArgs << width;
extraArgs << "* __restrict__ " << arrayName;
groupForcesArgs.push_back(&tabulatedFunctions.back()->getDevicePointer());
groupForcesArgs.push_back(&tabulatedFunctions[i].getDevicePointer());
}
// Record information about parameters.
......@@ -4924,15 +4800,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
if (needEnergyParamDerivs)
extraArgs << ", mixed* __restrict__ energyParamDerivs";
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals->upload(globalParamValues);
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals.upload(globalParamValues);
extraArgs << ", const float* __restrict__ globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables[name] = value;
}
groupForcesArgs.push_back(&globals->getDevicePointer());
groupForcesArgs.push_back(&globals.getDevicePointer());
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
......@@ -5144,7 +5020,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -5153,17 +5029,17 @@ double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(),
&groupOffsets->getDevicePointer(), &centerPositions->getDevicePointer()};
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles.getDevicePointer(), &groupWeights.getDevicePointer(),
&groupOffsets.getDevicePointer(), &centerPositions.getDevicePointer()};
cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
if (needEnergyParamDerivs)
groupForcesArgs[9] = &cu.getEnergyParamDerivBuffer().getDevicePointer();
cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
void* applyForcesArgs[] = {&groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(), &groupOffsets->getDevicePointer(),
&groupForces->getDevicePointer(), &cu.getForce().getDevicePointer()};
void* applyForcesArgs[] = {&groupParticles.getDevicePointer(), &groupWeights.getDevicePointer(), &groupOffsets.getDevicePointer(),
&groupForces.getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
return 0.0;
}
......@@ -5222,10 +5098,6 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel()
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
for (auto function : tabulatedFunctions)
delete function;
}
void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
......@@ -5256,16 +5128,16 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
CudaArray* array = CudaArray::create<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions.push_back(array);
array->upload(f);
string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
string arrayName = cu.getBondedUtilities().addArgument(tabulatedFunctions[i].getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
functionDefinitions.push_back(make_pair(name, arrayName));
}
......@@ -5289,9 +5161,9 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
globals.initialize<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
......@@ -5474,7 +5346,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
}
double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
......@@ -5483,7 +5355,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
globals.upload(globalParamValues);
}
return 0.0;
}
......@@ -5553,32 +5425,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel()
cu.setAsCurrent();
if (params != NULL)
delete params;
if (orderIndex != NULL)
delete orderIndex;
if (particleOrder != NULL)
delete particleOrder;
if (particleTypes != NULL)
delete particleTypes;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighborPairs != NULL)
delete neighborPairs;
if (numNeighborPairs != NULL)
delete numNeighborPairs;
if (neighborStartIndex != NULL)
delete neighborStartIndex;
if (neighbors != NULL)
delete neighbors;
if (numNeighborsForAtom != NULL)
delete numNeighborsForAtom;
for (auto function : tabulatedFunctions)
delete function;
}
void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
......@@ -5612,6 +5458,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
......@@ -5620,8 +5467,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
tableArgs << ", const float";
if (width > 1)
tableArgs << width;
......@@ -5667,16 +5514,16 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
bool hasTypeFilters = (particleOrderVec.size() > 1);
if (hasTypeFilters) {
particleTypes = CudaArray::create<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
orderIndex = CudaArray::create<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder = CudaArray::create<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes->upload(particleTypesVec);
orderIndex->upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder->getSize());
particleTypes.initialize<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
orderIndex.initialize<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder.initialize<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes.upload(particleTypesVec);
orderIndex.upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder.getSize());
for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int j = 0; j < particlesPerSet; j++)
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
particleOrder->upload(flattenedOrder);
particleOrder.upload(flattenedOrder);
}
// Build data structures for exclusions.
......@@ -5697,10 +5544,10 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionStartIndexVec[i+1] = exclusionsVec.size();
}
exclusions = CudaArray::create<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex = CudaArray::create<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions->upload(exclusionsVec);
exclusionStartIndex->upload(exclusionStartIndexVec);
exclusions.initialize<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex.initialize<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions.upload(exclusionsVec);
exclusionStartIndex.upload(exclusionStartIndexVec);
}
// Build data structures for the neighbor list.
......@@ -5708,19 +5555,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
if (nonbondedMethod != NoCutoff) {
int numAtomBlocks = cu.getNumAtomBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter = new CudaArray(cu, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new CudaArray(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs = CudaArray::create<int>(cu, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex = CudaArray::create<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom = CudaArray::create<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
blockCenter.initialize(cu, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox.initialize(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs.initialize<int>(cu, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex.initialize<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom.initialize<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");
// Select a size for the array that holds the neighbor list. We have to make a fairly
// arbitrary guess, but if this turns out to be too small we'll increase it later.
maxNeighborPairs = 150*numParticles;
neighborPairs = CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors = CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
neighborPairs.initialize<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors.initialize<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
......@@ -6045,22 +5892,22 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (nonbondedMethod != NoCutoff) {
forceArgs.push_back(&neighbors->getDevicePointer());
forceArgs.push_back(&neighborStartIndex->getDevicePointer());
forceArgs.push_back(&neighbors.getDevicePointer());
forceArgs.push_back(&neighborStartIndex.getDevicePointer());
}
if (particleTypes != NULL) {
forceArgs.push_back(&particleTypes->getDevicePointer());
forceArgs.push_back(&orderIndex->getDevicePointer());
forceArgs.push_back(&particleOrder->getDevicePointer());
if (particleTypes.isInitialized()) {
forceArgs.push_back(&particleTypes.getDevicePointer());
forceArgs.push_back(&orderIndex.getDevicePointer());
forceArgs.push_back(&particleOrder.getDevicePointer());
}
if (exclusions != NULL) {
forceArgs.push_back(&exclusions->getDevicePointer());
forceArgs.push_back(&exclusionStartIndex->getDevicePointer());
if (exclusions.isInitialized()) {
forceArgs.push_back(&exclusions.getDevicePointer());
forceArgs.push_back(&exclusionStartIndex.getDevicePointer());
}
for (auto& buffer : params->getBuffers())
forceArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
forceArgs.push_back(&function->getDevicePointer());
for (auto& function : tabulatedFunctions)
forceArgs.push_back(&function.getDevicePointer());
if (nonbondedMethod != NoCutoff) {
// Set arguments for the block bounds kernel.
......@@ -6071,9 +5918,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer());
blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer());
blockBoundsArgs.push_back(&blockCenter->getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox->getDevicePointer());
blockBoundsArgs.push_back(&numNeighborPairs->getDevicePointer());
blockBoundsArgs.push_back(&blockCenter.getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox.getDevicePointer());
blockBoundsArgs.push_back(&numNeighborPairs.getDevicePointer());
// Set arguments for the neighbor list kernel.
......@@ -6083,32 +5930,32 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer());
neighborsArgs.push_back(&cu.getPosq().getDevicePointer());
neighborsArgs.push_back(&blockCenter->getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox->getDevicePointer());
neighborsArgs.push_back(&neighborPairs->getDevicePointer());
neighborsArgs.push_back(&numNeighborPairs->getDevicePointer());
neighborsArgs.push_back(&numNeighborsForAtom->getDevicePointer());
neighborsArgs.push_back(&blockCenter.getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox.getDevicePointer());
neighborsArgs.push_back(&neighborPairs.getDevicePointer());
neighborsArgs.push_back(&numNeighborPairs.getDevicePointer());
neighborsArgs.push_back(&numNeighborsForAtom.getDevicePointer());
neighborsArgs.push_back(&maxNeighborPairs);
if (exclusions != NULL) {
neighborsArgs.push_back(&exclusions->getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex->getDevicePointer());
if (exclusions.isInitialized()) {
neighborsArgs.push_back(&exclusions.getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex.getDevicePointer());
}
// Set arguments for the kernel to find neighbor list start indices.
startIndicesArgs.push_back(&numNeighborsForAtom->getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex->getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs->getDevicePointer());
startIndicesArgs.push_back(&numNeighborsForAtom.getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex.getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs.getDevicePointer());
startIndicesArgs.push_back(&maxNeighborPairs);
// Set arguments for the kernel to assemble the final neighbor list.
copyPairsArgs.push_back(&neighborPairs->getDevicePointer());
copyPairsArgs.push_back(&neighbors->getDevicePointer());
copyPairsArgs.push_back(&numNeighborPairs->getDevicePointer());
copyPairsArgs.push_back(&neighborPairs.getDevicePointer());
copyPairsArgs.push_back(&neighbors.getDevicePointer());
copyPairsArgs.push_back(&numNeighborPairs.getDevicePointer());
copyPairsArgs.push_back(&maxNeighborPairs);
copyPairsArgs.push_back(&numNeighborsForAtom->getDevicePointer());
copyPairsArgs.push_back(&neighborStartIndex->getDevicePointer());
copyPairsArgs.push_back(&numNeighborsForAtom.getDevicePointer());
copyPairsArgs.push_back(&neighborStartIndex.getDevicePointer());
}
}
if (globalParamValues.size() > 0) {
......@@ -6131,7 +5978,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
// We need to make sure there was enough memory for the neighbor list. Download the
// information asynchronously so kernels can be running at the same time.
numNeighborPairs->download(numPairs, false);
numNeighborPairs.download(numPairs, false);
CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for CustomManyParticleForce");
cu.executeKernel(startIndicesKernel, &startIndicesArgs[0], 256, 256, 256*sizeof(int));
cu.executeKernel(copyPairsKernel, &copyPairsArgs[0], maxNeighborPairs);
......@@ -6145,17 +5992,13 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
if (*numPairs > maxNeighborPairs) {
// Resize the arrays and run the calculation again.
delete neighborPairs;
neighborPairs = NULL;
delete neighbors;
neighbors = NULL;
maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs = CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors = CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
forceArgs[5] = &neighbors->getDevicePointer();
neighborsArgs[5] = &neighborPairs->getDevicePointer();
copyPairsArgs[0] = &neighborPairs->getDevicePointer();
copyPairsArgs[1] = &neighbors->getDevicePointer();
neighborPairs.resize(maxNeighborPairs);
neighbors.resize(maxNeighborPairs);
forceArgs[5] = &neighbors.getDevicePointer();
neighborsArgs[5] = &neighborPairs.getDevicePointer();
copyPairsArgs[0] = &neighborPairs.getDevicePointer();
copyPairsArgs[1] = &neighbors.getDevicePointer();
continue;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment