Commit b33ee3b0 authored by peastman's avatar peastman
Browse files

More conversion of OpenCLArrays

parent d59b0373
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -81,7 +81,6 @@ namespace OpenMM { ...@@ -81,7 +81,6 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLBondedUtilities { class OPENMM_EXPORT_OPENCL OpenCLBondedUtilities {
public: public:
OpenCLBondedUtilities(OpenCLContext& context); OpenCLBondedUtilities(OpenCLContext& context);
~OpenCLBondedUtilities();
/** /**
* Add a bonded interaction. * Add a bonded interaction.
* *
...@@ -143,8 +142,8 @@ private: ...@@ -143,8 +142,8 @@ private:
std::vector<std::vector<int> > forceSets; std::vector<std::vector<int> > forceSets;
std::vector<cl::Memory*> arguments; std::vector<cl::Memory*> arguments;
std::vector<std::string> argTypes; std::vector<std::string> argTypes;
std::vector<OpenCLArray*> atomIndices; std::vector<OpenCLArray> atomIndices;
std::vector<OpenCLArray*> bufferIndices; std::vector<OpenCLArray> bufferIndices;
std::vector<std::string> prefixCode; std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives; std::vector<std::string> energyParameterDerivatives;
int numForceBuffers, maxBonds, allGroups; int numForceBuffers, maxBonds, allGroups;
......
...@@ -33,11 +33,10 @@ namespace OpenMM { ...@@ -33,11 +33,10 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLCompact { class OPENMM_EXPORT_OPENCL OpenCLCompact {
public: public:
OpenCLCompact(OpenCLContext& context); OpenCLCompact(OpenCLContext& context);
~OpenCLCompact();
void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid); void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid);
private: private:
OpenCLContext& context; OpenCLContext& context;
OpenCLArray* dgBlockCounts; OpenCLArray dgBlockCounts;
cl::Kernel countKernel; cl::Kernel countKernel;
cl::Kernel moveValidKernel; cl::Kernel moveValidKernel;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -42,25 +42,24 @@ namespace OpenMM { ...@@ -42,25 +42,24 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLIntegrationUtilities { class OPENMM_EXPORT_OPENCL OpenCLIntegrationUtilities {
public: public:
OpenCLIntegrationUtilities(OpenCLContext& context, const System& system); OpenCLIntegrationUtilities(OpenCLContext& context, const System& system);
~OpenCLIntegrationUtilities();
/** /**
* Get the array which contains position deltas. * Get the array which contains position deltas.
*/ */
OpenCLArray& getPosDelta() { OpenCLArray& getPosDelta() {
return *posDelta; return posDelta;
} }
/** /**
* Get the array which contains random values. Each element is a float4, whose components * Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1. * are independent, normally distributed random numbers with mean 0 and variance 1.
*/ */
OpenCLArray& getRandom() { OpenCLArray& getRandom() {
return *random; return random;
} }
/** /**
* Get the array which contains the current step size. * Get the array which contains the current step size.
*/ */
OpenCLArray& getStepSize() { OpenCLArray& getStepSize() {
return *stepSize; return stepSize;
} }
/** /**
* Set the size to use for the next step. * Set the size to use for the next step.
...@@ -131,36 +130,36 @@ private: ...@@ -131,36 +130,36 @@ private:
cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel; cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel;
cl::Kernel vsitePositionKernel, vsiteForceKernel, vsiteAddForcesKernel; cl::Kernel vsitePositionKernel, vsiteForceKernel, vsiteAddForcesKernel;
cl::Kernel randomKernel, timeShiftKernel; cl::Kernel randomKernel, timeShiftKernel;
OpenCLArray* posDelta; OpenCLArray posDelta;
OpenCLArray* settleAtoms; OpenCLArray settleAtoms;
OpenCLArray* settleParams; OpenCLArray settleParams;
OpenCLArray* shakeAtoms; OpenCLArray shakeAtoms;
OpenCLArray* shakeParams; OpenCLArray shakeParams;
OpenCLArray* random; OpenCLArray random;
OpenCLArray* randomSeed; OpenCLArray randomSeed;
OpenCLArray* stepSize; OpenCLArray stepSize;
OpenCLArray* ccmaAtoms; OpenCLArray ccmaAtoms;
OpenCLArray* ccmaDistance; OpenCLArray ccmaDistance;
OpenCLArray* ccmaReducedMass; OpenCLArray ccmaReducedMass;
OpenCLArray* ccmaAtomConstraints; OpenCLArray ccmaAtomConstraints;
OpenCLArray* ccmaNumAtomConstraints; OpenCLArray ccmaNumAtomConstraints;
OpenCLArray* ccmaConstraintMatrixColumn; OpenCLArray ccmaConstraintMatrixColumn;
OpenCLArray* ccmaConstraintMatrixValue; OpenCLArray ccmaConstraintMatrixValue;
OpenCLArray* ccmaDelta1; OpenCLArray ccmaDelta1;
OpenCLArray* ccmaDelta2; OpenCLArray ccmaDelta2;
OpenCLArray* ccmaConverged; OpenCLArray ccmaConverged;
OpenCLArray* ccmaConvergedHostBuffer; OpenCLArray ccmaConvergedHostBuffer;
OpenCLArray* vsite2AvgAtoms; OpenCLArray vsite2AvgAtoms;
OpenCLArray* vsite2AvgWeights; OpenCLArray vsite2AvgWeights;
OpenCLArray* vsite3AvgAtoms; OpenCLArray vsite3AvgAtoms;
OpenCLArray* vsite3AvgWeights; OpenCLArray vsite3AvgWeights;
OpenCLArray* vsiteOutOfPlaneAtoms; OpenCLArray vsiteOutOfPlaneAtoms;
OpenCLArray* vsiteOutOfPlaneWeights; OpenCLArray vsiteOutOfPlaneWeights;
OpenCLArray* vsiteLocalCoordsIndex; OpenCLArray vsiteLocalCoordsIndex;
OpenCLArray* vsiteLocalCoordsAtoms; OpenCLArray vsiteLocalCoordsAtoms;
OpenCLArray* vsiteLocalCoordsWeights; OpenCLArray vsiteLocalCoordsWeights;
OpenCLArray* vsiteLocalCoordsPos; OpenCLArray vsiteLocalCoordsPos;
OpenCLArray* vsiteLocalCoordsStartIndex; OpenCLArray vsiteLocalCoordsStartIndex;
int randomPos; int randomPos;
int lastSeed, numVsites; int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites; bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites;
......
...@@ -1132,12 +1132,8 @@ private: ...@@ -1132,12 +1132,8 @@ private:
class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel { class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public: public:
OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl), OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL), hasInitializedKernels(false) {
exceptionParams(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
} }
~OpenCLCalcGayBerneForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1169,25 +1165,25 @@ private: ...@@ -1169,25 +1165,25 @@ private:
bool hasInitializedKernels; bool hasInitializedKernels;
int numRealParticles, maxNeighborBlocks; int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod; GayBerneForce::NonbondedMethod nonbondedMethod;
OpenCLArray* sortedParticles; OpenCLArray sortedParticles;
OpenCLArray* axisParticleIndices; OpenCLArray axisParticleIndices;
OpenCLArray* sigParams; OpenCLArray sigParams;
OpenCLArray* epsParams; OpenCLArray epsParams;
OpenCLArray* scale; OpenCLArray scale;
OpenCLArray* exceptionParticles; OpenCLArray exceptionParticles;
OpenCLArray* exceptionParams; OpenCLArray exceptionParams;
OpenCLArray* aMatrix; OpenCLArray aMatrix;
OpenCLArray* bMatrix; OpenCLArray bMatrix;
OpenCLArray* gMatrix; OpenCLArray gMatrix;
OpenCLArray* exclusions; OpenCLArray exclusions;
OpenCLArray* exclusionStartIndex; OpenCLArray exclusionStartIndex;
OpenCLArray* blockCenter; OpenCLArray blockCenter;
OpenCLArray* blockBoundingBox; OpenCLArray blockBoundingBox;
OpenCLArray* neighbors; OpenCLArray neighbors;
OpenCLArray* neighborIndex; OpenCLArray neighborIndex;
OpenCLArray* neighborBlockCount; OpenCLArray neighborBlockCount;
OpenCLArray* sortedPos; OpenCLArray sortedPos;
OpenCLArray* torque; OpenCLArray torque;
std::vector<bool> isRealParticle; std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs; std::vector<std::pair<int, int> > excludedPairs;
...@@ -1200,9 +1196,8 @@ private: ...@@ -1200,9 +1196,8 @@ private:
class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel { class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public: public:
OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform), OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform),
cl(cl), hasInitializedKernels(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) { cl(cl), hasInitializedKernels(false) {
} }
~OpenCLCalcCustomCVForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1236,9 +1231,9 @@ private: ...@@ -1236,9 +1231,9 @@ private:
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames; std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions; std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions; std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<OpenCLArray*> cvForces; std::vector<OpenCLArray> cvForces;
OpenCLArray* invAtomOrder; OpenCLArray invAtomOrder;
OpenCLArray* innerInvAtomOrder; OpenCLArray innerInvAtomOrder;
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel; cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
}; };
...@@ -1247,10 +1242,8 @@ private: ...@@ -1247,10 +1242,8 @@ private:
*/ */
class OpenCLCalcRMSDForceKernel : public CalcRMSDForceKernel { class OpenCLCalcRMSDForceKernel : public CalcRMSDForceKernel {
public: public:
OpenCLCalcRMSDForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcRMSDForceKernel(name, platform), OpenCLCalcRMSDForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcRMSDForceKernel(name, platform), cl(cl) {
cl(cl), referencePos(NULL), particles(NULL), buffer(NULL) {
} }
~OpenCLCalcRMSDForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1289,9 +1282,9 @@ private: ...@@ -1289,9 +1282,9 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
double sumNormRef; double sumNormRef;
OpenCLArray* referencePos; OpenCLArray referencePos;
OpenCLArray* particles; OpenCLArray particles;
OpenCLArray* buffer; OpenCLArray buffer;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
...@@ -1337,9 +1330,8 @@ private: ...@@ -1337,9 +1330,8 @@ private:
class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel { class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public: public:
OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl), OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), params(NULL) { hasInitializedKernels(false) {
} }
~OpenCLIntegrateLangevinStepKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1365,7 +1357,7 @@ private: ...@@ -1365,7 +1357,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize; double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels; bool hasInitializedKernels;
OpenCLArray* params; OpenCLArray params;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
...@@ -1451,9 +1443,8 @@ private: ...@@ -1451,9 +1443,8 @@ private:
class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel { class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
public: public:
OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl), OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), params(NULL) { hasInitializedKernels(false) {
} }
~OpenCLIntegrateVariableLangevinStepKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1481,7 +1472,7 @@ private: ...@@ -1481,7 +1472,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int blockSize; int blockSize;
OpenCLArray* params; OpenCLArray params;
cl::Kernel kernel1, kernel2, selectSizeKernel; cl::Kernel kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol; double prevTemp, prevFriction, prevErrorTol;
}; };
...@@ -1493,8 +1484,7 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel { ...@@ -1493,8 +1484,7 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public: public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER}; enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl), OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
} }
~OpenCLIntegrateCustomStepKernel(); ~OpenCLIntegrateCustomStepKernel();
/** /**
...@@ -1575,15 +1565,15 @@ private: ...@@ -1575,15 +1565,15 @@ private:
int numGlobalVariables, sumWorkGroupSize; int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
OpenCLArray* globalValues; OpenCLArray globalValues;
OpenCLArray* sumBuffer; OpenCLArray sumBuffer;
OpenCLArray* summedValue; OpenCLArray summedValue;
OpenCLArray* uniformRandoms; OpenCLArray uniformRandoms;
OpenCLArray* randomSeed; OpenCLArray randomSeed;
OpenCLArray* perDofEnergyParamDerivs; OpenCLArray perDofEnergyParamDerivs;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
std::map<int, double> savedEnergy; std::map<int, double> savedEnergy;
std::map<int, OpenCLArray*> savedForces; std::map<int, OpenCLArray> savedForces;
std::set<int> validSavedForces; std::set<int> validSavedForces;
OpenCLParameterSet* perDofValues; OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat; mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat;
...@@ -1635,9 +1625,8 @@ public: ...@@ -1635,9 +1625,8 @@ public:
class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel { class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
public: public:
OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl), OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl),
hasInitializedKernels(false), atomGroups(NULL) { hasInitializedKernels(false) {
} }
~OpenCLApplyAndersenThermostatKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1655,7 +1644,7 @@ private: ...@@ -1655,7 +1644,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int randomSeed; int randomSeed;
OpenCLArray* atomGroups; OpenCLArray atomGroups;
cl::Kernel kernel; cl::Kernel kernel;
}; };
...@@ -1665,9 +1654,8 @@ private: ...@@ -1665,9 +1654,8 @@ private:
class OpenCLApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel { class OpenCLApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public: public:
OpenCLApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyMonteCarloBarostatKernel(name, platform), cl(cl), OpenCLApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyMonteCarloBarostatKernel(name, platform), cl(cl),
hasInitializedKernels(false), savedPositions(NULL), savedForces(NULL), moleculeAtoms(NULL), moleculeStartIndex(NULL) { hasInitializedKernels(false) {
} }
~OpenCLApplyMonteCarloBarostatKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1699,10 +1687,10 @@ private: ...@@ -1699,10 +1687,10 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int numMolecules; int numMolecules;
OpenCLArray* savedPositions; OpenCLArray savedPositions;
OpenCLArray* savedForces; OpenCLArray savedForces;
OpenCLArray* moleculeAtoms; OpenCLArray moleculeAtoms;
OpenCLArray* moleculeStartIndex; OpenCLArray moleculeStartIndex;
cl::Kernel kernel; cl::Kernel kernel;
std::vector<int> lastAtomOrder; std::vector<int> lastAtomOrder;
}; };
...@@ -1712,9 +1700,8 @@ private: ...@@ -1712,9 +1700,8 @@ private:
*/ */
class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel { class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel {
public: public:
OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl), cmMomentum(NULL) { OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl) {
} }
~OpenCLRemoveCMMotionKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1731,7 +1718,7 @@ public: ...@@ -1731,7 +1718,7 @@ public:
private: private:
OpenCLContext& cl; OpenCLContext& cl;
int frequency; int frequency;
OpenCLArray* cmMomentum; OpenCLArray cmMomentum;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -175,55 +175,55 @@ public: ...@@ -175,55 +175,55 @@ public:
* Get the array containing the center of each atom block. * Get the array containing the center of each atom block.
*/ */
OpenCLArray& getBlockCenters() { OpenCLArray& getBlockCenters() {
return *blockCenter; return blockCenter;
} }
/** /**
* Get the array containing the dimensions of each atom block. * Get the array containing the dimensions of each atom block.
*/ */
OpenCLArray& getBlockBoundingBoxes() { OpenCLArray& getBlockBoundingBoxes() {
return *blockBoundingBox; return blockBoundingBox;
} }
/** /**
* Get the array whose first element contains the number of tiles with interactions. * Get the array whose first element contains the number of tiles with interactions.
*/ */
OpenCLArray& getInteractionCount() { OpenCLArray& getInteractionCount() {
return *interactionCount; return interactionCount;
} }
/** /**
* Get the array containing tiles with interactions. * Get the array containing tiles with interactions.
*/ */
OpenCLArray& getInteractingTiles() { OpenCLArray& getInteractingTiles() {
return *interactingTiles; return interactingTiles;
} }
/** /**
* Get the array containing the atoms in each tile with interactions. * Get the array containing the atoms in each tile with interactions.
*/ */
OpenCLArray& getInteractingAtoms() { OpenCLArray& getInteractingAtoms() {
return *interactingAtoms; return interactingAtoms;
} }
/** /**
* Get the array containing exclusion flags. * Get the array containing exclusion flags.
*/ */
OpenCLArray& getExclusions() { OpenCLArray& getExclusions() {
return *exclusions; return exclusions;
} }
/** /**
* Get the array containing tiles with exclusions. * Get the array containing tiles with exclusions.
*/ */
OpenCLArray& getExclusionTiles() { OpenCLArray& getExclusionTiles() {
return *exclusionTiles; return exclusionTiles;
} }
/** /**
* Get the array containing the index into the exclusion array for each tile. * Get the array containing the index into the exclusion array for each tile.
*/ */
OpenCLArray& getExclusionIndices() { OpenCLArray& getExclusionIndices() {
return *exclusionIndices; return exclusionIndices;
} }
/** /**
* Get the array listing where the exclusion data starts for each row. * Get the array listing where the exclusion data starts for each row.
*/ */
OpenCLArray& getExclusionRowIndices() { OpenCLArray& getExclusionRowIndices() {
return *exclusionRowIndices; return exclusionRowIndices;
} }
/** /**
* Get the index of the first tile this context is responsible for processing. * Get the index of the first tile this context is responsible for processing.
...@@ -275,20 +275,20 @@ private: ...@@ -275,20 +275,20 @@ private:
class BlockSortTrait; class BlockSortTrait;
OpenCLContext& context; OpenCLContext& context;
std::map<int, KernelSet> groupKernels; std::map<int, KernelSet> groupKernels;
OpenCLArray* exclusionTiles; OpenCLArray exclusionTiles;
OpenCLArray* exclusions; OpenCLArray exclusions;
OpenCLArray* exclusionIndices; OpenCLArray exclusionIndices;
OpenCLArray* exclusionRowIndices; OpenCLArray exclusionRowIndices;
OpenCLArray* interactingTiles; OpenCLArray interactingTiles;
OpenCLArray* interactingAtoms; OpenCLArray interactingAtoms;
OpenCLArray* interactionCount; OpenCLArray interactionCount;
OpenCLArray* blockCenter; OpenCLArray blockCenter;
OpenCLArray* blockBoundingBox; OpenCLArray blockBoundingBox;
OpenCLArray* sortedBlocks; OpenCLArray sortedBlocks;
OpenCLArray* sortedBlockCenter; OpenCLArray sortedBlockCenter;
OpenCLArray* sortedBlockBoundingBox; OpenCLArray sortedBlockBoundingBox;
OpenCLArray* oldPositions; OpenCLArray oldPositions;
OpenCLArray* rebuildNeighborList; OpenCLArray rebuildNeighborList;
OpenCLSort* blockSorter; OpenCLSort* blockSorter;
cl::Event downloadCountEvent; cl::Event downloadCountEvent;
cl::Buffer* pinnedCountBuffer; cl::Buffer* pinnedCountBuffer;
......
...@@ -84,7 +84,7 @@ private: ...@@ -84,7 +84,7 @@ private:
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions; std::vector<double> contextNonbondedFractions;
std::vector<int> tileCounts; std::vector<int> tileCounts;
OpenCLArray* contextForces; OpenCLArray contextForces;
cl::Buffer* pinnedPositionBuffer; cl::Buffer* pinnedPositionBuffer;
cl::Buffer* pinnedForceBuffer; cl::Buffer* pinnedForceBuffer;
void* pinnedPositionMemory; void* pinnedPositionMemory;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2010-2013 Stanford University and the Authors. * * Portions copyright (c) 2010-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -87,11 +87,11 @@ public: ...@@ -87,11 +87,11 @@ public:
private: private:
OpenCLContext& context; OpenCLContext& context;
SortTrait* trait; SortTrait* trait;
OpenCLArray* dataRange; OpenCLArray dataRange;
OpenCLArray* bucketOfElement; OpenCLArray bucketOfElement;
OpenCLArray* offsetInBucket; OpenCLArray offsetInBucket;
OpenCLArray* bucketOffset; OpenCLArray bucketOffset;
OpenCLArray* buckets; OpenCLArray buckets;
cl::Kernel shortListKernel, computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel; cl::Kernel shortListKernel, computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel;
unsigned int dataLength, rangeKernelSize, positionsKernelSize, sortKernelSize; unsigned int dataLength, rangeKernelSize, positionsKernelSize, sortKernelSize;
bool isShortList; bool isShortList;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -36,13 +36,6 @@ using namespace std; ...@@ -36,13 +36,6 @@ using namespace std;
OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) { OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) {
} }
OpenCLBondedUtilities::~OpenCLBondedUtilities() {
for (int i = 0; i < (int) atomIndices.size(); i++)
delete atomIndices[i];
for (int i = 0; i < (int) bufferIndices.size(); i++)
delete bufferIndices[i];
}
void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, const string& source, int group) { void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, const string& source, int group) {
if (atoms.size() > 0) { if (atoms.size() > 0) {
forceAtoms.push_back(atoms); forceAtoms.push_back(atoms);
...@@ -92,6 +85,7 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -92,6 +85,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
vector<vector<cl_uint> > bufferVec(numForces); vector<vector<cl_uint> > bufferVec(numForces);
vector<vector<int> > bufferCounter(numForces, vector<int>(system.getNumParticles(), 0)); vector<vector<int> > bufferCounter(numForces, vector<int>(system.getNumParticles(), 0));
vector<int> numBuffers(numForces, 0); vector<int> numBuffers(numForces, 0);
atomIndices.resize(numForces);
for (int i = 0; i < numForces; i++) { for (int i = 0; i < numForces; i++) {
int numBonds = forceAtoms[i].size(); int numBonds = forceAtoms[i].size();
int numAtoms = forceAtoms[i][0].size(); int numAtoms = forceAtoms[i][0].size();
...@@ -101,9 +95,8 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -101,9 +95,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][atom]; indexVec[bond*width+atom] = forceAtoms[i][bond][atom];
} }
OpenCLArray* indices = OpenCLArray::create<cl_uint>(context, indexVec.size(), "bondedIndices"); atomIndices[i].initialize<cl_uint>(context, indexVec.size(), "bondedIndices");
indices->upload(indexVec); atomIndices[i].upload(indexVec);
atomIndices.push_back(indices);
bufferVec[i].resize(width*numBonds, 0); bufferVec[i].resize(width*numBonds, 0);
for (int bond = 0; bond < numBonds; bond++) { for (int bond = 0; bond < numBonds; bond++) {
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
...@@ -177,9 +170,8 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -177,9 +170,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int bond = 0; bond < numBonds; bond++) for (int bond = 0; bond < numBonds; bond++)
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]]; bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]];
OpenCLArray* buffers = OpenCLArray::create<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices"); bufferIndices[force].initialize<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices");
buffers->upload(bufferVec[force]); bufferIndices[force].upload(bufferVec[force]);
bufferIndices[force] = buffers;
} }
// Create the kernels. // Create the kernels.
...@@ -291,8 +283,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) { ...@@ -291,8 +283,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
index += 6; index += 6;
for (int j = 0; j < (int) forceSets[i].size(); j++) { for (int j = 0; j < (int) forceSets[i].size(); j++) {
kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]].getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]].getDeviceBuffer());
} }
for (int j = 0; j < (int) arguments.size(); j++) for (int j = 0; j < (int) arguments.size(); j++)
kernel.setArg<cl::Memory>(index++, *arguments[j]); kernel.setArg<cl::Memory>(index++, *arguments[j]);
......
...@@ -29,18 +29,13 @@ ...@@ -29,18 +29,13 @@
using namespace OpenMM; using namespace OpenMM;
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) { OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context) {
dgBlockCounts = OpenCLArray::create<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts"); dgBlockCounts.initialize<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
cl::Program program = context.createProgram(OpenCLKernelSources::compact); cl::Program program = context.createProgram(OpenCLKernelSources::compact);
countKernel = cl::Kernel(program, "countElts"); countKernel = cl::Kernel(program, "countElts");
moveValidKernel = cl::Kernel(program, "moveValidElementsStaged"); moveValidKernel = cl::Kernel(program, "moveValidElementsStaged");
} }
OpenCLCompact::~OpenCLCompact() {
if (dgBlockCounts != NULL)
delete dgBlockCounts;
}
void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid) { void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid) {
// Figure out # elements per block // Figure out # elements per block
unsigned int len = dIn.getSize(); unsigned int len = dIn.getSize();
...@@ -51,7 +46,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr ...@@ -51,7 +46,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
// TODO: implement loop over blocks of 10M // TODO: implement loop over blocks of 10M
// Phase 1: Calculate number of valid elements per thread block // Phase 1: Calculate number of valid elements per thread block
countKernel.setArg<cl::Buffer>(0, dgBlockCounts->getDeviceBuffer()); countKernel.setArg<cl::Buffer>(0, dgBlockCounts.getDeviceBuffer());
countKernel.setArg<cl::Buffer>(1, dValid.getDeviceBuffer()); countKernel.setArg<cl::Buffer>(1, dValid.getDeviceBuffer());
countKernel.setArg<cl_uint>(2, len); countKernel.setArg<cl_uint>(2, len);
countKernel.setArg(3, 128*sizeof(cl_uint), NULL); countKernel.setArg(3, 128*sizeof(cl_uint), NULL);
...@@ -61,7 +56,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr ...@@ -61,7 +56,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
moveValidKernel.setArg<cl::Buffer>(0, dIn.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(0, dIn.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(1, dOut.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(1, dOut.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(2, dValid.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(2, dValid.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(3, dgBlockCounts->getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(3, dgBlockCounts.getDeviceBuffer());
moveValidKernel.setArg<cl_uint>(4, len); moveValidKernel.setArg<cl_uint>(4, len);
moveValidKernel.setArg<cl::Buffer>(5, numValid.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(5, numValid.getDeviceBuffer());
moveValidKernel.setArg(6, 128*sizeof(cl_uint), NULL); moveValidKernel.setArg(6, 128*sizeof(cl_uint), NULL);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLIntegrationUtilities.h" #include "OpenCLIntegrationUtilities.h"
#include "OpenCLArray.h"
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include "openmm/internal/OSRngSeed.h" #include "openmm/internal/OSRngSeed.h"
#include "openmm/HarmonicAngleForce.h" #include "openmm/HarmonicAngleForce.h"
...@@ -97,31 +96,24 @@ static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int inde ...@@ -97,31 +96,24 @@ static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int inde
} }
OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : context(context), OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : context(context),
posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL), randomPos(0), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false), hasOverlappingVsites(false) {
random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL),
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedHostBuffer(NULL),
vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsIndex(NULL), vsiteLocalCoordsAtoms(NULL),
vsiteLocalCoordsWeights(NULL), vsiteLocalCoordsPos(NULL), vsiteLocalCoordsStartIndex(NULL),
hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false), hasOverlappingVsites(false) {
// Create workspace arrays. // Create workspace arrays.
lastStepSize = mm_double2(0.0, 0.0); lastStepSize = mm_double2(0.0, 0.0);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) { if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
posDelta = OpenCLArray::create<mm_double4>(context, context.getPaddedNumAtoms(), "posDelta"); posDelta.initialize<mm_double4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_double4> deltas(posDelta->getSize(), mm_double4(0.0, 0.0, 0.0, 0.0)); vector<mm_double4> deltas(posDelta.getSize(), mm_double4(0.0, 0.0, 0.0, 0.0));
posDelta->upload(deltas); posDelta.upload(deltas);
stepSize = OpenCLArray::create<mm_double2>(context, 1, "stepSize"); stepSize.initialize<mm_double2>(context, 1, "stepSize");
stepSize->upload(&lastStepSize); stepSize.upload(&lastStepSize);
} }
else { else {
posDelta = OpenCLArray::create<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta"); posDelta.initialize<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_float4> deltas(posDelta->getSize(), mm_float4(0.0f, 0.0f, 0.0f, 0.0f)); vector<mm_float4> deltas(posDelta.getSize(), mm_float4(0.0f, 0.0f, 0.0f, 0.0f));
posDelta->upload(deltas); posDelta.upload(deltas);
stepSize = OpenCLArray::create<mm_float2>(context, 1, "stepSize"); stepSize.initialize<mm_float2>(context, 1, "stepSize");
mm_float2 lastStepSizeFloat = mm_float2(0.0f, 0.0f); mm_float2 lastStepSizeFloat = mm_float2(0.0f, 0.0f);
stepSize->upload(&lastStepSizeFloat); stepSize.upload(&lastStepSizeFloat);
} }
// Create the time shift kernel for calculating kinetic energy. // Create the time shift kernel for calculating kinetic energy.
...@@ -227,10 +219,10 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -227,10 +219,10 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom[atom3] = true; isShakeAtom[atom3] = true;
} }
if (atoms.size() > 0) { if (atoms.size() > 0) {
settleAtoms = OpenCLArray::create<mm_int4>(context, atoms.size(), "settleAtoms"); settleAtoms.initialize<mm_int4>(context, atoms.size(), "settleAtoms");
settleParams = OpenCLArray::create<mm_float2>(context, params.size(), "settleParams"); settleParams.initialize<mm_float2>(context, params.size(), "settleParams");
settleAtoms->upload(atoms); settleAtoms.upload(atoms);
settleParams->upload(params); settleParams.upload(params);
} }
} }
...@@ -310,10 +302,10 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -310,10 +302,10 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom[cluster.peripheralID[2]] = true; isShakeAtom[cluster.peripheralID[2]] = true;
++index; ++index;
} }
shakeAtoms = OpenCLArray::create<mm_int4>(context, atoms.size(), "shakeAtoms"); shakeAtoms.initialize<mm_int4>(context, atoms.size(), "shakeAtoms");
shakeParams = OpenCLArray::create<mm_float4>(context, params.size(), "shakeParams"); shakeParams.initialize<mm_float4>(context, params.size(), "shakeParams");
shakeAtoms->upload(atoms); shakeAtoms.upload(atoms);
shakeParams->upload(params); shakeParams.upload(params);
} }
// Find connected constraints for CCMA. // Find connected constraints for CCMA.
...@@ -390,28 +382,28 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -390,28 +382,28 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Record the CCMA data structures. // Record the CCMA data structures.
ccmaAtoms = OpenCLArray::create<mm_int2>(context, numCCMA, "CcmaAtoms"); ccmaAtoms.initialize<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints"); ccmaAtomConstraints.initialize<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaNumAtomConstraints.initialize<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConstraintMatrixColumn = OpenCLArray::create<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn.initialize<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged = OpenCLArray::create<cl_int>(context, 2, "CcmaConverged"); ccmaConverged.initialize<cl_int>(context, 2, "CcmaConverged");
ccmaConvergedHostBuffer = OpenCLArray::create<cl_int>(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); ccmaConvergedHostBuffer.initialize<cl_int>(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR);
// Different communication mechanisms give optimal performance on AMD and on NVIDIA. // Different communication mechanisms give optimal performance on AMD and on NVIDIA.
string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>(); string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>();
ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc."); ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc.");
vector<mm_int2> atomsVec(ccmaAtoms->getSize()); vector<mm_int2> atomsVec(ccmaAtoms.getSize());
vector<cl_int> atomConstraintsVec(ccmaAtomConstraints->getSize()); vector<cl_int> atomConstraintsVec(ccmaAtomConstraints.getSize());
vector<cl_int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize()); vector<cl_int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
vector<cl_int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn->getSize()); vector<cl_int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) { if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
ccmaDistance = OpenCLArray::create<mm_double4>(context, numCCMA, "CcmaDistance"); ccmaDistance.initialize<mm_double4>(context, numCCMA, "CcmaDistance");
ccmaDelta1 = OpenCLArray::create<cl_double>(context, numCCMA, "CcmaDelta1"); ccmaDelta1.initialize<cl_double>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = OpenCLArray::create<cl_double>(context, numCCMA, "CcmaDelta2"); ccmaDelta2.initialize<cl_double>(context, numCCMA, "CcmaDelta2");
ccmaReducedMass = OpenCLArray::create<cl_double>(context, numCCMA, "CcmaReducedMass"); ccmaReducedMass.initialize<cl_double>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixValue = OpenCLArray::create<cl_double>(context, numCCMA*maxRowElements, "ConstraintMatrixValue"); ccmaConstraintMatrixValue.initialize<cl_double>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
vector<mm_double4> distanceVec(ccmaDistance->getSize()); vector<mm_double4> distanceVec(ccmaDistance.getSize());
vector<cl_double> reducedMassVec(ccmaReducedMass->getSize()); vector<cl_double> reducedMassVec(ccmaReducedMass.getSize());
vector<cl_double> constraintMatrixValueVec(ccmaConstraintMatrixValue->getSize()); vector<cl_double> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
for (int i = 0; i < numCCMA; i++) { for (int i = 0; i < numCCMA; i++) {
int index = constraintOrder[i]; int index = constraintOrder[i];
int c = ccmaConstraints[index]; int c = ccmaConstraints[index];
...@@ -432,19 +424,19 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -432,19 +424,19 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1); atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
} }
} }
ccmaDistance->upload(distanceVec); ccmaDistance.upload(distanceVec);
ccmaReducedMass->upload(reducedMassVec); ccmaReducedMass.upload(reducedMassVec);
ccmaConstraintMatrixValue->upload(constraintMatrixValueVec); ccmaConstraintMatrixValue.upload(constraintMatrixValueVec);
} }
else { else {
ccmaDistance = OpenCLArray::create<mm_float4>(context, numCCMA, "CcmaDistance"); ccmaDistance.initialize<mm_float4>(context, numCCMA, "CcmaDistance");
ccmaDelta1 = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaDelta1"); ccmaDelta1.initialize<cl_float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaDelta2"); ccmaDelta2.initialize<cl_float>(context, numCCMA, "CcmaDelta2");
ccmaReducedMass = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaReducedMass"); ccmaReducedMass.initialize<cl_float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixValue = OpenCLArray::create<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue"); ccmaConstraintMatrixValue.initialize<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
vector<mm_float4> distanceVec(ccmaDistance->getSize()); vector<mm_float4> distanceVec(ccmaDistance.getSize());
vector<cl_float> reducedMassVec(ccmaReducedMass->getSize()); vector<cl_float> reducedMassVec(ccmaReducedMass.getSize());
vector<cl_float> constraintMatrixValueVec(ccmaConstraintMatrixValue->getSize()); vector<cl_float> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
for (int i = 0; i < numCCMA; i++) { for (int i = 0; i < numCCMA; i++) {
int index = constraintOrder[i]; int index = constraintOrder[i];
int c = ccmaConstraints[index]; int c = ccmaConstraints[index];
...@@ -465,14 +457,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -465,14 +457,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1); atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
} }
} }
ccmaDistance->upload(distanceVec); ccmaDistance.upload(distanceVec);
ccmaReducedMass->upload(reducedMassVec); ccmaReducedMass.upload(reducedMassVec);
ccmaConstraintMatrixValue->upload(constraintMatrixValueVec); ccmaConstraintMatrixValue.upload(constraintMatrixValueVec);
} }
ccmaAtoms->upload(atomsVec); ccmaAtoms.upload(atomsVec);
ccmaAtomConstraints->upload(atomConstraintsVec); ccmaAtomConstraints.upload(atomConstraintsVec);
ccmaNumAtomConstraints->upload(numAtomConstraintsVec); ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
ccmaConstraintMatrixColumn->upload(constraintMatrixColumnVec); ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec);
// Create the CCMA kernels. // Create the CCMA kernels.
...@@ -554,73 +546,73 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -554,73 +546,73 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size(); int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
int numLocalCoords = vsiteLocalCoordsPosVec.size(); int numLocalCoords = vsiteLocalCoordsPosVec.size();
numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords; numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords;
vsite2AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms"); vsite2AvgAtoms.initialize<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite3AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms"); vsite3AvgAtoms.initialize<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsiteOutOfPlaneAtoms = OpenCLArray::create<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms"); vsiteOutOfPlaneAtoms.initialize<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteLocalCoordsIndex = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex"); vsiteLocalCoordsIndex.initialize<cl_int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
vsiteLocalCoordsAtoms = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms"); vsiteLocalCoordsAtoms.initialize<cl_int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
vsiteLocalCoordsStartIndex = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex"); vsiteLocalCoordsStartIndex.initialize<cl_int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
if (num2Avg > 0) if (num2Avg > 0)
vsite2AvgAtoms->upload(vsite2AvgAtomVec); vsite2AvgAtoms.upload(vsite2AvgAtomVec);
if (num3Avg > 0) if (num3Avg > 0)
vsite3AvgAtoms->upload(vsite3AvgAtomVec); vsite3AvgAtoms.upload(vsite3AvgAtomVec);
if (numOutOfPlane > 0) if (numOutOfPlane > 0)
vsiteOutOfPlaneAtoms->upload(vsiteOutOfPlaneAtomVec); vsiteOutOfPlaneAtoms.upload(vsiteOutOfPlaneAtomVec);
if (numLocalCoords > 0) { if (numLocalCoords > 0) {
vsiteLocalCoordsIndex->upload(vsiteLocalCoordsIndexVec); vsiteLocalCoordsIndex.upload(vsiteLocalCoordsIndexVec);
vsiteLocalCoordsAtoms->upload(vsiteLocalCoordsAtomVec); vsiteLocalCoordsAtoms.upload(vsiteLocalCoordsAtomVec);
vsiteLocalCoordsStartIndex->upload(vsiteLocalCoordsStartVec); vsiteLocalCoordsStartIndex.upload(vsiteLocalCoordsStartVec);
} }
if (context.getUseDoublePrecision()) { if (context.getUseDoublePrecision()) {
vsite2AvgWeights = OpenCLArray::create<mm_double2>(context, max(1, num2Avg), "vsite2AvgWeights"); vsite2AvgWeights.initialize<mm_double2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = OpenCLArray::create<mm_double4>(context, max(1, num3Avg), "vsite3AvgWeights"); vsite3AvgWeights.initialize<mm_double4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = OpenCLArray::create<mm_double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights"); vsiteOutOfPlaneWeights.initialize<mm_double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsWeights = OpenCLArray::create<cl_double>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights"); vsiteLocalCoordsWeights.initialize<cl_double>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = OpenCLArray::create<mm_double4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos"); vsiteLocalCoordsPos.initialize<mm_double4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0) if (num2Avg > 0)
vsite2AvgWeights->upload(vsite2AvgWeightVec); vsite2AvgWeights.upload(vsite2AvgWeightVec);
if (num3Avg > 0) if (num3Avg > 0)
vsite3AvgWeights->upload(vsite3AvgWeightVec); vsite3AvgWeights.upload(vsite3AvgWeightVec);
if (numOutOfPlane > 0) if (numOutOfPlane > 0)
vsiteOutOfPlaneWeights->upload(vsiteOutOfPlaneWeightVec); vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec);
if (numLocalCoords > 0) { if (numLocalCoords > 0) {
vsiteLocalCoordsWeights->upload(vsiteLocalCoordsWeightVec); vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec);
vsiteLocalCoordsPos->upload(vsiteLocalCoordsPosVec); vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec);
} }
} }
else { else {
vsite2AvgWeights = OpenCLArray::create<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights"); vsite2AvgWeights.initialize<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = OpenCLArray::create<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights"); vsite3AvgWeights.initialize<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = OpenCLArray::create<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights"); vsiteOutOfPlaneWeights.initialize<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsWeights = OpenCLArray::create<cl_float>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights"); vsiteLocalCoordsWeights.initialize<cl_float>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = OpenCLArray::create<mm_float4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos"); vsiteLocalCoordsPos.initialize<mm_float4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0) { if (num2Avg > 0) {
vector<mm_float2> floatWeights(num2Avg); vector<mm_float2> floatWeights(num2Avg);
for (int i = 0; i < num2Avg; i++) for (int i = 0; i < num2Avg; i++)
floatWeights[i] = mm_float2((float) vsite2AvgWeightVec[i].x, (float) vsite2AvgWeightVec[i].y); floatWeights[i] = mm_float2((float) vsite2AvgWeightVec[i].x, (float) vsite2AvgWeightVec[i].y);
vsite2AvgWeights->upload(floatWeights); vsite2AvgWeights.upload(floatWeights);
} }
if (num3Avg > 0) { if (num3Avg > 0) {
vector<mm_float4> floatWeights(num3Avg); vector<mm_float4> floatWeights(num3Avg);
for (int i = 0; i < num3Avg; i++) for (int i = 0; i < num3Avg; i++)
floatWeights[i] = mm_float4((float) vsite3AvgWeightVec[i].x, (float) vsite3AvgWeightVec[i].y, (float) vsite3AvgWeightVec[i].z, 0.0f); floatWeights[i] = mm_float4((float) vsite3AvgWeightVec[i].x, (float) vsite3AvgWeightVec[i].y, (float) vsite3AvgWeightVec[i].z, 0.0f);
vsite3AvgWeights->upload(floatWeights); vsite3AvgWeights.upload(floatWeights);
} }
if (numOutOfPlane > 0) { if (numOutOfPlane > 0) {
vector<mm_float4> floatWeights(numOutOfPlane); vector<mm_float4> floatWeights(numOutOfPlane);
for (int i = 0; i < numOutOfPlane; i++) for (int i = 0; i < numOutOfPlane; i++)
floatWeights[i] = mm_float4((float) vsiteOutOfPlaneWeightVec[i].x, (float) vsiteOutOfPlaneWeightVec[i].y, (float) vsiteOutOfPlaneWeightVec[i].z, 0.0f); floatWeights[i] = mm_float4((float) vsiteOutOfPlaneWeightVec[i].x, (float) vsiteOutOfPlaneWeightVec[i].y, (float) vsiteOutOfPlaneWeightVec[i].z, 0.0f);
vsiteOutOfPlaneWeights->upload(floatWeights); vsiteOutOfPlaneWeights.upload(floatWeights);
} }
if (numLocalCoords > 0) { if (numLocalCoords > 0) {
vector<cl_float> floatWeights(vsiteLocalCoordsWeightVec.size()); vector<cl_float> floatWeights(vsiteLocalCoordsWeightVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsWeightVec.size(); i++) for (int i = 0; i < (int) vsiteLocalCoordsWeightVec.size(); i++)
floatWeights[i] = (cl_float) vsiteLocalCoordsWeightVec[i]; floatWeights[i] = (cl_float) vsiteLocalCoordsWeightVec[i];
vsiteLocalCoordsWeights->upload(floatWeights); vsiteLocalCoordsWeights.upload(floatWeights);
vector<mm_float4> floatPos(vsiteLocalCoordsPosVec.size()); vector<mm_float4> floatPos(vsiteLocalCoordsPosVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsPosVec.size(); i++) for (int i = 0; i < (int) vsiteLocalCoordsPosVec.size(); i++)
floatPos[i] = mm_float4((float) vsiteLocalCoordsPosVec[i].x, (float) vsiteLocalCoordsPosVec[i].y, (float) vsiteLocalCoordsPosVec[i].z, 0.0f); floatPos[i] = mm_float4((float) vsiteLocalCoordsPosVec[i].x, (float) vsiteLocalCoordsPosVec[i].y, (float) vsiteLocalCoordsPosVec[i].z, 0.0f);
vsiteLocalCoordsPos->upload(floatPos); vsiteLocalCoordsPos.upload(floatPos);
} }
} }
...@@ -655,17 +647,17 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -655,17 +647,17 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsitePositionKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
vsitePositionKernel.setArg<cl::Buffer>(index++, context.getPosqCorrection().getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, context.getPosqCorrection().getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite2AvgAtoms->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsite2AvgAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite2AvgWeights->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsite2AvgWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgAtoms->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex.getDeviceBuffer());
vsiteForceKernel = cl::Kernel(vsiteProgram, "distributeForces"); vsiteForceKernel = cl::Kernel(vsiteProgram, "distributeForces");
index = 0; index = 0;
vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
...@@ -674,102 +666,39 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -674,102 +666,39 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
index++; // Skip argument 2: the force array hasn't been created yet. index++; // Skip argument 2: the force array hasn't been created yet.
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosqCorrection().getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosqCorrection().getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite2AvgAtoms->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsite2AvgAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite2AvgWeights->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsite2AvgWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgAtoms->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex.getDeviceBuffer());
if (hasOverlappingVsites && context.getSupports64BitGlobalAtomics()) if (hasOverlappingVsites && context.getSupports64BitGlobalAtomics())
vsiteAddForcesKernel = cl::Kernel(vsiteProgram, "addDistributedForces"); vsiteAddForcesKernel = cl::Kernel(vsiteProgram, "addDistributedForces");
} }
OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
if (posDelta != NULL)
delete posDelta;
if (settleAtoms != NULL)
delete settleAtoms;
if (settleParams != NULL)
delete settleParams;
if (shakeAtoms != NULL)
delete shakeAtoms;
if (shakeParams != NULL)
delete shakeParams;
if (random != NULL)
delete random;
if (randomSeed != NULL)
delete randomSeed;
if (stepSize != NULL)
delete stepSize;
if (ccmaAtoms != NULL)
delete ccmaAtoms;
if (ccmaDistance != NULL)
delete ccmaDistance;
if (ccmaReducedMass != NULL)
delete ccmaReducedMass;
if (ccmaAtomConstraints != NULL)
delete ccmaAtomConstraints;
if (ccmaNumAtomConstraints != NULL)
delete ccmaNumAtomConstraints;
if (ccmaConstraintMatrixColumn != NULL)
delete ccmaConstraintMatrixColumn;
if (ccmaConstraintMatrixValue != NULL)
delete ccmaConstraintMatrixValue;
if (ccmaDelta1 != NULL)
delete ccmaDelta1;
if (ccmaDelta2 != NULL)
delete ccmaDelta2;
if (ccmaConverged != NULL)
delete ccmaConverged;
if (ccmaConvergedHostBuffer != NULL)
delete ccmaConvergedHostBuffer;
if (vsite2AvgAtoms != NULL)
delete vsite2AvgAtoms;
if (vsite2AvgWeights != NULL)
delete vsite2AvgWeights;
if (vsite3AvgAtoms != NULL)
delete vsite3AvgAtoms;
if (vsite3AvgWeights != NULL)
delete vsite3AvgWeights;
if (vsiteOutOfPlaneAtoms != NULL)
delete vsiteOutOfPlaneAtoms;
if (vsiteOutOfPlaneWeights != NULL)
delete vsiteOutOfPlaneWeights;
if (vsiteLocalCoordsIndex != NULL)
delete vsiteLocalCoordsIndex;
if (vsiteLocalCoordsAtoms != NULL)
delete vsiteLocalCoordsAtoms;
if (vsiteLocalCoordsWeights != NULL)
delete vsiteLocalCoordsWeights;
if (vsiteLocalCoordsPos != NULL)
delete vsiteLocalCoordsPos;
if (vsiteLocalCoordsStartIndex != NULL)
delete vsiteLocalCoordsStartIndex;
}
void OpenCLIntegrationUtilities::setNextStepSize(double size) { void OpenCLIntegrationUtilities::setNextStepSize(double size) {
if (size != lastStepSize.x || size != lastStepSize.y) { if (size != lastStepSize.x || size != lastStepSize.y) {
lastStepSize = mm_double2(size, size); lastStepSize = mm_double2(size, size);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize->upload(&lastStepSize); stepSize.upload(&lastStepSize);
else { else {
mm_float2 lastStepSizeFloat = mm_float2((float) size, (float) size); mm_float2 lastStepSizeFloat = mm_float2((float) size, (float) size);
stepSize->upload(&lastStepSizeFloat); stepSize.upload(&lastStepSizeFloat);
} }
} }
} }
double OpenCLIntegrationUtilities::getLastStepSize() { double OpenCLIntegrationUtilities::getLastStepSize() {
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize->download(&lastStepSize); stepSize.download(&lastStepSize);
else { else {
mm_float2 lastStepSizeFloat; mm_float2 lastStepSizeFloat;
stepSize->download(&lastStepSizeFloat); stepSize.download(&lastStepSizeFloat);
lastStepSize = mm_double2(lastStepSizeFloat.x, lastStepSizeFloat.y); lastStepSize = mm_double2(lastStepSizeFloat.x, lastStepSizeFloat.y);
} }
return lastStepSize.y; return lastStepSize.y;
...@@ -802,102 +731,102 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub ...@@ -802,102 +731,102 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
ccmaUpdateKernel = ccmaPosUpdateKernel; ccmaUpdateKernel = ccmaPosUpdateKernel;
hasInitializedPosConstraintKernels = true; hasInitializedPosConstraintKernels = true;
} }
if (settleAtoms != NULL) { if (settleAtoms.isInitialized()) {
if (!hasInitialized) { if (!hasInitialized) {
settleKernel.setArg<cl_int>(0, settleAtoms->getSize()); settleKernel.setArg<cl_int>(0, settleAtoms.getSize());
settleKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); settleKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
settleKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer()); settleKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer());
else else
settleKernel.setArg<void*>(3, NULL); settleKernel.setArg<void*>(3, NULL);
settleKernel.setArg<cl::Buffer>(4, posDelta->getDeviceBuffer()); settleKernel.setArg<cl::Buffer>(4, posDelta.getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(5, context.getVelm().getDeviceBuffer()); settleKernel.setArg<cl::Buffer>(5, context.getVelm().getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(6, settleAtoms->getDeviceBuffer()); settleKernel.setArg<cl::Buffer>(6, settleAtoms.getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(7, settleParams->getDeviceBuffer()); settleKernel.setArg<cl::Buffer>(7, settleParams.getDeviceBuffer());
} }
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
settleKernel.setArg<cl_double>(1, (cl_double) tol); settleKernel.setArg<cl_double>(1, (cl_double) tol);
else else
settleKernel.setArg<cl_float>(1, (cl_float) tol); settleKernel.setArg<cl_float>(1, (cl_float) tol);
context.executeKernel(settleKernel, settleAtoms->getSize()); context.executeKernel(settleKernel, settleAtoms.getSize());
} }
if (shakeAtoms != NULL) { if (shakeAtoms.isInitialized()) {
if (!hasInitialized) { if (!hasInitialized) {
shakeKernel.setArg<cl_int>(0, shakeAtoms->getSize()); shakeKernel.setArg<cl_int>(0, shakeAtoms.getSize());
shakeKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); shakeKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
shakeKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer()); shakeKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer());
else else
shakeKernel.setArg<void*>(3, NULL); shakeKernel.setArg<void*>(3, NULL);
shakeKernel.setArg<cl::Buffer>(4, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer()); shakeKernel.setArg<cl::Buffer>(4, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta.getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(5, shakeAtoms->getDeviceBuffer()); shakeKernel.setArg<cl::Buffer>(5, shakeAtoms.getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(6, shakeParams->getDeviceBuffer()); shakeKernel.setArg<cl::Buffer>(6, shakeParams.getDeviceBuffer());
} }
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
shakeKernel.setArg<cl_double>(1, (cl_double) tol); shakeKernel.setArg<cl_double>(1, (cl_double) tol);
else else
shakeKernel.setArg<cl_float>(1, (cl_float) tol); shakeKernel.setArg<cl_float>(1, (cl_float) tol);
context.executeKernel(shakeKernel, shakeAtoms->getSize()); context.executeKernel(shakeKernel, shakeAtoms.getSize());
} }
if (ccmaAtoms != NULL) { if (ccmaAtoms.isInitialized()) {
if (!hasInitialized) { if (!hasInitialized) {
ccmaDirectionsKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer()); ccmaDirectionsKernel.setArg<cl::Buffer>(0, ccmaAtoms.getDeviceBuffer());
ccmaDirectionsKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer()); ccmaDirectionsKernel.setArg<cl::Buffer>(1, ccmaDistance.getDeviceBuffer());
ccmaDirectionsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); ccmaDirectionsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
ccmaDirectionsKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer()); ccmaDirectionsKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer());
else else
ccmaDirectionsKernel.setArg<void*>(3, NULL); ccmaDirectionsKernel.setArg<void*>(3, NULL);
ccmaDirectionsKernel.setArg<cl::Buffer>(4, ccmaConverged->getDeviceBuffer()); ccmaDirectionsKernel.setArg<cl::Buffer>(4, ccmaConverged.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(0, ccmaAtoms.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(1, ccmaDistance.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(2, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(2, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(6, ccmaConvergedHostBuffer->getDeviceBuffer()); ccmaForceKernel.setArg<cl::Buffer>(6, ccmaConvergedHostBuffer.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(0, ccmaDelta1->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(0, ccmaDelta1.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(1, ccmaDelta2->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(1, ccmaDelta2.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(2, ccmaConstraintMatrixColumn->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(2, ccmaConstraintMatrixColumn.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(3, ccmaConstraintMatrixValue->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(3, ccmaConstraintMatrixValue.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(4, ccmaConverged->getDeviceBuffer()); ccmaMultiplyKernel.setArg<cl::Buffer>(4, ccmaConverged.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(0, ccmaNumAtomConstraints->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(0, ccmaNumAtomConstraints.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(1, ccmaAtomConstraints->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(1, ccmaAtomConstraints.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(2, ccmaDistance->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(2, ccmaDistance.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(3, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(3, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(5, ccmaDelta1->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(5, ccmaDelta1.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(6, ccmaDelta2->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(6, ccmaDelta2.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(7, ccmaConverged->getDeviceBuffer()); ccmaUpdateKernel.setArg<cl::Buffer>(7, ccmaConverged.getDeviceBuffer());
} }
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaForceKernel.setArg<cl_double>(7, (cl_double) tol); ccmaForceKernel.setArg<cl_double>(7, (cl_double) tol);
else else
ccmaForceKernel.setArg<cl_float>(7, (cl_float) tol); ccmaForceKernel.setArg<cl_float>(7, (cl_float) tol);
context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaDirectionsKernel, ccmaAtoms.getSize());
const int checkInterval = 4; const int checkInterval = 4;
int* converged = (int*) context.getPinnedBuffer(); int* converged = (int*) context.getPinnedBuffer();
int* ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer->getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int)); int* ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int));
ccmaConvergedHostMemory[0] = 0; ccmaConvergedHostMemory[0] = 0;
context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer->getDeviceBuffer(), ccmaConvergedHostMemory); context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
for (int i = 0; i < 150; i++) { for (int i = 0; i < 150; i++) {
ccmaForceKernel.setArg<cl_int>(8, i); ccmaForceKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaForceKernel, ccmaAtoms.getSize());
cl::Event event; cl::Event event;
if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer) if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer)
context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), converged, NULL, &event); context.getQueue().enqueueReadBuffer(ccmaConverged.getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), converged, NULL, &event);
ccmaMultiplyKernel.setArg<cl_int>(5, i); ccmaMultiplyKernel.setArg<cl_int>(5, i);
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize()); context.executeKernel(ccmaMultiplyKernel, ccmaAtoms.getSize());
ccmaUpdateKernel.setArg<cl_int>(8, i); ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms()); context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
if ((i+1)%checkInterval == 0) { if ((i+1)%checkInterval == 0) {
if (ccmaUseDirectBuffer) { if (ccmaUseDirectBuffer) {
ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer->getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event); ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event);
context.getQueue().flush(); context.getQueue().flush();
while (event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE) while (event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE)
; ;
converged[i%2] = ccmaConvergedHostMemory[0]; converged[i%2] = ccmaConvergedHostMemory[0];
context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer->getDeviceBuffer(), ccmaConvergedHostMemory); context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
} }
else else
event.wait(); event.wait();
...@@ -938,7 +867,7 @@ void OpenCLIntegrationUtilities::distributeForcesFromVirtualSites() { ...@@ -938,7 +867,7 @@ void OpenCLIntegrationUtilities::distributeForcesFromVirtualSites() {
} }
void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) { void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {
if (random != NULL) { if (random.isInitialized()) {
if (randomNumberSeed != lastSeed) if (randomNumberSeed != lastSeed)
throw OpenMMException("OpenCLIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed"); throw OpenMMException("OpenCLIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed");
return; return;
...@@ -947,23 +876,23 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu ...@@ -947,23 +876,23 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu
// Create the random number arrays. // Create the random number arrays.
lastSeed = randomNumberSeed; lastSeed = randomNumberSeed;
random = OpenCLArray::create<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random"); random.initialize<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random");
randomSeed = OpenCLArray::create<mm_int4>(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed"); randomSeed.initialize<mm_int4>(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
randomPos = random->getSize(); randomPos = random.getSize();
// Use a quick and dirty RNG to pick seeds for the real random number generator. // Use a quick and dirty RNG to pick seeds for the real random number generator.
vector<mm_int4> seed(randomSeed->getSize()); vector<mm_int4> seed(randomSeed.getSize());
unsigned int r = randomNumberSeed; unsigned int r = randomNumberSeed;
// A seed of 0 means use a unique one // A seed of 0 means use a unique one
if (r == 0) r = (unsigned int) osrngseed(); if (r == 0) r = (unsigned int) osrngseed();
for (int i = 0; i < randomSeed->getSize(); i++) { for (int i = 0; i < randomSeed.getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF; seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF; seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF; seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF; seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
} }
randomSeed->upload(seed); randomSeed.upload(seed);
// Create the kernel. // Create the kernel.
...@@ -972,45 +901,44 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu ...@@ -972,45 +901,44 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu
} }
int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) { int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) {
if (randomPos+numValues <= random->getSize()) { if (randomPos+numValues <= random.getSize()) {
int oldPos = randomPos; int oldPos = randomPos;
randomPos += numValues; randomPos += numValues;
return oldPos; return oldPos;
} }
if (numValues > random->getSize()) { if (numValues > random.getSize()) {
delete random; random.resize(numValues);
random = OpenCLArray::create<mm_float4>(context, numValues, "random");
} }
randomKernel.setArg<cl_int>(0, random->getSize()); randomKernel.setArg<cl_int>(0, random.getSize());
randomKernel.setArg<cl::Buffer>(1, random->getDeviceBuffer()); randomKernel.setArg<cl::Buffer>(1, random.getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed->getDeviceBuffer()); randomKernel.setArg<cl::Buffer>(2, randomSeed.getDeviceBuffer());
context.executeKernel(randomKernel, random->getSize()); context.executeKernel(randomKernel, random.getSize());
randomPos = numValues; randomPos = numValues;
return 0; return 0;
} }
void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) { void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) {
if(random == NULL) if (!random.isInitialized())
return; return;
stream.write((char*) &randomPos, sizeof(int)); stream.write((char*) &randomPos, sizeof(int));
vector<mm_float4> randomVec; vector<mm_float4> randomVec;
random->download(randomVec); random.download(randomVec);
stream.write((char*) &randomVec[0], sizeof(mm_float4)*random->getSize()); stream.write((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
vector<mm_int4> randomSeedVec; vector<mm_int4> randomSeedVec;
randomSeed->download(randomSeedVec); randomSeed.download(randomSeedVec);
stream.write((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed->getSize()); stream.write((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
} }
void OpenCLIntegrationUtilities::loadCheckpoint(istream& stream) { void OpenCLIntegrationUtilities::loadCheckpoint(istream& stream) {
if(random == NULL) if (!random.isInitialized())
return; return;
stream.read((char*) &randomPos, sizeof(int)); stream.read((char*) &randomPos, sizeof(int));
vector<mm_float4> randomVec(random->getSize()); vector<mm_float4> randomVec(random.getSize());
stream.read((char*) &randomVec[0], sizeof(mm_float4)*random->getSize()); stream.read((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
random->upload(randomVec); random.upload(randomVec);
vector<mm_int4> randomSeedVec(randomSeed->getSize()); vector<mm_int4> randomSeedVec(randomSeed.getSize());
stream.read((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed->getSize()); stream.read((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
randomSeed->upload(randomSeedVec); randomSeed.upload(randomSeedVec);
} }
double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) { double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
...@@ -1018,7 +946,7 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) { ...@@ -1018,7 +946,7 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
if (timeShift != 0) { if (timeShift != 0) {
// Copy the velocities into the posDelta array while we temporarily modify them. // Copy the velocities into the posDelta array while we temporarily modify them.
context.getVelm().copyTo(*posDelta); context.getVelm().copyTo(posDelta);
// Apply the time shift. // Apply the time shift.
...@@ -1057,6 +985,6 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) { ...@@ -1057,6 +985,6 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
// Restore the velocities. // Restore the velocities.
if (timeShift != 0) if (timeShift != 0)
posDelta->copyTo(context.getVelm()); posDelta.copyTo(context.getVelm());
return 0.5*energy; return 0.5*energy;
} }
...@@ -6357,47 +6357,6 @@ private: ...@@ -6357,47 +6357,6 @@ private:
OpenCLCalcGayBerneForceKernel& owner; OpenCLCalcGayBerneForceKernel& owner;
}; };
OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
if (sortedParticles != NULL)
delete sortedParticles;
if (axisParticleIndices != NULL)
delete axisParticleIndices;
if (sigParams != NULL)
delete sigParams;
if (epsParams != NULL)
delete epsParams;
if (scale != NULL)
delete scale;
if (exceptionParticles != NULL)
delete exceptionParticles;
if (exceptionParams != NULL)
delete exceptionParams;
if (aMatrix != NULL)
delete aMatrix;
if (bMatrix != NULL)
delete bMatrix;
if (gMatrix != NULL)
delete gMatrix;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighbors != NULL)
delete neighbors;
if (neighborIndex != NULL)
delete neighborIndex;
if (neighborBlockCount != NULL)
delete neighborBlockCount;
if (sortedPos != NULL)
delete sortedPos;
if (torque != NULL)
delete torque;
}
void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBerneForce& force) { void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBerneForce& force) {
if (!cl.getSupports64BitGlobalAtomics()) if (!cl.getSupports64BitGlobalAtomics())
throw OpenMMException("GayBerneForce requires a device that supports 64 bit atomic operations"); throw OpenMMException("GayBerneForce requires a device that supports 64 bit atomic operations");
...@@ -6405,14 +6364,14 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6405,14 +6364,14 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
// Initialize interactions. // Initialize interactions.
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
sigParams = OpenCLArray::create<mm_float4>(cl, cl.getPaddedNumAtoms(), "sigParams"); sigParams.initialize<mm_float4>(cl, cl.getPaddedNumAtoms(), "sigParams");
epsParams = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "epsParams"); epsParams.initialize<mm_float2>(cl, cl.getPaddedNumAtoms(), "epsParams");
scale = OpenCLArray::create<mm_float4>(cl, cl.getPaddedNumAtoms(), "scale"); scale.initialize<mm_float4>(cl, cl.getPaddedNumAtoms(), "scale");
axisParticleIndices = OpenCLArray::create<mm_int2>(cl, cl.getPaddedNumAtoms(), "axisParticleIndices"); axisParticleIndices.initialize<mm_int2>(cl, cl.getPaddedNumAtoms(), "axisParticleIndices");
sortedParticles = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "sortedParticles"); sortedParticles.initialize<cl_int>(cl, cl.getPaddedNumAtoms(), "sortedParticles");
aMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "aMatrix"); aMatrix.initialize<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "aMatrix");
bMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "bMatrix"); bMatrix.initialize<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "bMatrix");
gMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "gMatrix"); gMatrix.initialize<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "gMatrix");
vector<mm_float4> sigParamsVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0)); vector<mm_float4> sigParamsVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float2> epsParamsVector(cl.getPaddedNumAtoms(), mm_float2(0, 0)); vector<mm_float2> epsParamsVector(cl.getPaddedNumAtoms(), mm_float2(0, 0));
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0)); vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
...@@ -6428,10 +6387,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6428,10 +6387,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0); scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
isRealParticle[i] = (epsilon != 0.0); isRealParticle[i] = (epsilon != 0.0);
} }
sigParams->upload(sigParamsVector); sigParams.upload(sigParamsVector);
epsParams->upload(epsParamsVector); epsParams.upload(epsParamsVector);
scale->upload(scaleVector); scale.upload(scaleVector);
axisParticleIndices->upload(axisParticleVector); axisParticleIndices.upload(axisParticleVector);
// Record exceptions and exclusions. // Record exceptions and exclusions.
...@@ -6454,29 +6413,29 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6454,29 +6413,29 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
if (isRealParticle[i]) if (isRealParticle[i])
numRealParticles++; numRealParticles++;
int numExceptions = exceptionParamsVec.size(); int numExceptions = exceptionParamsVec.size();
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions"); exclusions.initialize<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex"); exclusionStartIndex.initialize<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
exceptionParticles = OpenCLArray::create<mm_int4>(cl, max(1, numExceptions), "exceptionParticles"); exceptionParticles.initialize<mm_int4>(cl, max(1, numExceptions), "exceptionParticles");
exceptionParams = OpenCLArray::create<mm_float2>(cl, max(1, numExceptions), "exceptionParams"); exceptionParams.initialize<mm_float2>(cl, max(1, numExceptions), "exceptionParams");
if (numExceptions > 0) if (numExceptions > 0)
exceptionParams->upload(exceptionParamsVec); exceptionParams.upload(exceptionParamsVec);
// Create data structures used for the neighbor list. // Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32; int numAtomBlocks = (numRealParticles+31)/32;
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter"); blockCenter.initialize(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox"); blockBoundingBox.initialize(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos"); sortedPos.initialize(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2; maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors"); neighbors.initialize<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighborIndex"); neighborIndex.initialize<cl_int>(cl, maxNeighborBlocks, "neighborIndex");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount"); neighborBlockCount.initialize<cl_int>(cl, 1, "neighborBlockCount");
// Create array for accumulating torques. // Create array for accumulating torques.
torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque"); torque.initialize<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
cl.addAutoclearBuffer(*torque); cl.addAutoclearBuffer(torque);
// Create the kernels. // Create the kernels.
...@@ -6519,60 +6478,60 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6519,60 +6478,60 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
sortAtoms(); sortAtoms();
framesKernel.setArg<cl_int>(0, numRealParticles); framesKernel.setArg<cl_int>(0, numRealParticles);
framesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(2, axisParticleIndices->getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(2, axisParticleIndices.getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(3, sigParams->getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(3, sigParams.getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(4, scale->getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(4, scale.getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(5, aMatrix->getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(5, aMatrix.getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(6, bMatrix->getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(6, bMatrix.getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(7, gMatrix->getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(7, gMatrix.getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(8, sortedParticles->getDeviceBuffer()); framesKernel.setArg<cl::Buffer>(8, sortedParticles.getDeviceBuffer());
blockBoundsKernel.setArg<cl_int>(0, numRealParticles); blockBoundsKernel.setArg<cl_int>(0, numRealParticles);
blockBoundsKernel.setArg<cl::Buffer>(6, sortedParticles->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(6, sortedParticles.getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(7, cl.getPosq().getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(7, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos.getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter.getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox.getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount.getDeviceBuffer());
neighborsKernel.setArg<cl_int>(0, numRealParticles); neighborsKernel.setArg<cl_int>(0, numRealParticles);
neighborsKernel.setArg<cl_int>(1, maxNeighborBlocks); neighborsKernel.setArg<cl_int>(1, maxNeighborBlocks);
neighborsKernel.setArg<cl::Buffer>(7, sortedPos->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(7, sortedPos.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, blockCenter->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(8, blockCenter.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(10, neighbors.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(11, neighborIndex.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(13, exclusions->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(13, exclusions.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(14, exclusionStartIndex->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(14, exclusionStartIndex.getDeviceBuffer());
int index = 0; int index = 0;
forceKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, torque.getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles); forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, exceptionAtoms.size()); forceKernel.setArg<cl_int>(index++, exceptionAtoms.size());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, sortedPos.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, sigParams.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, epsParams->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, epsParams.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, sortedParticles.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, aMatrix->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, aMatrix.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, bMatrix->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, bMatrix.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, gMatrix.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exclusions.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exceptionParticles.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exceptionParams.getDeviceBuffer());
if (nonbondedMethod != GayBerneForce::NoCutoff) { if (nonbondedMethod != GayBerneForce::NoCutoff) {
forceKernel.setArg<cl_int>(index++, maxNeighborBlocks); forceKernel.setArg<cl_int>(index++, maxNeighborBlocks);
forceKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, neighbors.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, neighborIndex.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborBlockCount->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, neighborBlockCount.getDeviceBuffer());
} }
index = 0; index = 0;
torqueKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer()); torqueKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer()); torqueKernel.setArg<cl::Buffer>(index++, torque.getDeviceBuffer());
torqueKernel.setArg<cl_int>(index++, numRealParticles); torqueKernel.setArg<cl_int>(index++, numRealParticles);
torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices->getDeviceBuffer()); torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices.getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer()); torqueKernel.setArg<cl::Buffer>(index++, sortedParticles.getDeviceBuffer());
} }
cl.executeKernel(framesKernel, numRealParticles); cl.executeKernel(framesKernel, numRealParticles);
setPeriodicBoxArgs(cl, blockBoundsKernel, 1); setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
...@@ -6586,7 +6545,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6586,7 +6545,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
cl.executeKernel(neighborsKernel, numRealParticles); cl.executeKernel(neighborsKernel, numRealParticles);
cl_int* count = (cl_int*) cl.getPinnedBuffer(); cl_int* count = (cl_int*) cl.getPinnedBuffer();
cl::Event event; cl::Event event;
cl.getQueue().enqueueReadBuffer(neighborBlockCount->getDeviceBuffer(), CL_FALSE, 0, neighborBlockCount->getSize()*neighborBlockCount->getElementSize(), count, NULL, &event); cl.getQueue().enqueueReadBuffer(neighborBlockCount.getDeviceBuffer(), CL_FALSE, 0, neighborBlockCount.getSize()*neighborBlockCount.getElementSize(), count, NULL, &event);
setPeriodicBoxArgs(cl, forceKernel, 20); setPeriodicBoxArgs(cl, forceKernel, 20);
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize()); cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
event.wait(); event.wait();
...@@ -6595,17 +6554,13 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6595,17 +6554,13 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
// There wasn't enough room for the neighbor list, so we need to recreate it. // There wasn't enough room for the neighbor list, so we need to recreate it.
delete neighbors;
neighbors = NULL;
delete neighborIndex;
neighborIndex = NULL;
maxNeighborBlocks = (int) ceil((*count)*1.1); maxNeighborBlocks = (int) ceil((*count)*1.1);
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors"); neighbors.resize(maxNeighborBlocks*32);
neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighborIndex"); neighborIndex.resize(maxNeighborBlocks);
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(10, neighbors.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(11, neighborIndex.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(17, neighbors->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(17, neighbors.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(18, neighborIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(18, neighborIndex.getDeviceBuffer());
} }
} }
cl.executeKernel(torqueKernel, numRealParticles); cl.executeKernel(torqueKernel, numRealParticles);
...@@ -6644,9 +6599,9 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context ...@@ -6644,9 +6599,9 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
if (epsilon != 0.0 && !isRealParticle[i]) if (epsilon != 0.0 && !isRealParticle[i])
throw OpenMMException("updateParametersInContext: The set of ignored particles (ones with epsilon=0) has changed"); throw OpenMMException("updateParametersInContext: The set of ignored particles (ones with epsilon=0) has changed");
} }
sigParams->upload(sigParamsVector); sigParams.upload(sigParamsVector);
epsParams->upload(epsParamsVector); epsParams.upload(epsParamsVector);
scale->upload(scaleVector); scale.upload(scaleVector);
// Record the exceptions. // Record the exceptions.
...@@ -6658,7 +6613,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context ...@@ -6658,7 +6613,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
force.getExceptionParameters(exceptions[i], atom1, atom2, sigma, epsilon); force.getExceptionParameters(exceptions[i], atom1, atom2, sigma, epsilon);
exceptionParamsVec[i] = mm_float2((float) sigma, (float) epsilon); exceptionParamsVec[i] = mm_float2((float) sigma, (float) epsilon);
} }
exceptionParams->upload(exceptionParamsVec); exceptionParams.upload(exceptionParamsVec);
} }
cl.invalidateMolecules(info); cl.invalidateMolecules(info);
sortAtoms(); sortAtoms();
...@@ -6679,7 +6634,7 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() { ...@@ -6679,7 +6634,7 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
particles[nextIndex++] = atom; particles[nextIndex++] = atom;
} }
} }
sortedParticles->upload(particles); sortedParticles.upload(particles);
// Update the list of exception particles. // Update the list of exception particles.
...@@ -6688,7 +6643,7 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() { ...@@ -6688,7 +6643,7 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
vector<mm_int4> exceptionParticlesVec(numExceptions); vector<mm_int4> exceptionParticlesVec(numExceptions);
for (int i = 0; i < numExceptions; i++) for (int i = 0; i < numExceptions; i++)
exceptionParticlesVec[i] = mm_int4(exceptionAtoms[i].first, exceptionAtoms[i].second, inverseOrder[exceptionAtoms[i].first], inverseOrder[exceptionAtoms[i].second]); exceptionParticlesVec[i] = mm_int4(exceptionAtoms[i].first, exceptionAtoms[i].second, inverseOrder[exceptionAtoms[i].first], inverseOrder[exceptionAtoms[i].second]);
exceptionParticles->upload(exceptionParticlesVec); exceptionParticles.upload(exceptionParticlesVec);
} }
// Rebuild the list of exclusions. // Rebuild the list of exclusions.
...@@ -6700,16 +6655,16 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() { ...@@ -6700,16 +6655,16 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
excludedAtoms[first].push_back(second); excludedAtoms[first].push_back(second);
} }
int index = 0; int index = 0;
vector<int> exclusionVec(exclusions->getSize()); vector<int> exclusionVec(exclusions.getSize());
vector<int> startIndexVec(exclusionStartIndex->getSize()); vector<int> startIndexVec(exclusionStartIndex.getSize());
for (int i = 0; i < numRealParticles; i++) { for (int i = 0; i < numRealParticles; i++) {
startIndexVec[i] = index; startIndexVec[i] = index;
for (int j = 0; j < excludedAtoms[i].size(); j++) for (int j = 0; j < excludedAtoms[i].size(); j++)
exclusionVec[index++] = excludedAtoms[i][j]; exclusionVec[index++] = excludedAtoms[i][j];
} }
startIndexVec[numRealParticles] = index; startIndexVec[numRealParticles] = index;
exclusions->upload(exclusionVec); exclusions.upload(exclusionVec);
exclusionStartIndex->upload(startIndexVec); exclusionStartIndex.upload(startIndexVec);
} }
class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener { class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
...@@ -6728,15 +6683,6 @@ private: ...@@ -6728,15 +6683,6 @@ private:
OpenCLArray& invAtomOrder; OpenCLArray& invAtomOrder;
}; };
OpenCLCalcCustomCVForceKernel::~OpenCLCalcCustomCVForceKernel() {
for (auto force : cvForces)
delete force;
if (invAtomOrder != NULL)
delete invAtomOrder;
if (innerInvAtomOrder != NULL)
delete innerInvAtomOrder;
}
void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) { void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables(); int numCVs = force.getNumCollectiveVariables();
cl.addForce(new OpenCLForceInfo(1)); cl.addForce(new OpenCLForceInfo(1));
...@@ -6779,10 +6725,11 @@ void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const Custo ...@@ -6779,10 +6725,11 @@ void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const Custo
// Create arrays for storing information. // Create arrays for storing information.
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
cvForces.resize(numCVs);
for (int i = 0; i < numCVs; i++) for (int i = 0; i < numCVs; i++)
cvForces.push_back(new OpenCLArray(cl, cl.getNumAtoms(), 4*elementSize, "cvForce")); cvForces[i].initialize(cl, cl.getNumAtoms(), 4*elementSize, "cvForce");
invAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder"); invAtomOrder.initialize<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder"); innerInvAtomOrder.initialize<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels. // Create the kernels.
...@@ -6809,7 +6756,7 @@ double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& ...@@ -6809,7 +6756,7 @@ double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl&
vector<map<string, double> > cvDerivs(numCVs); vector<map<string, double> > cvDerivs(numCVs);
for (int i = 0; i < numCVs; i++) { for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i)); cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i]->getDeviceBuffer()); copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i].getDeviceBuffer());
cl.executeKernel(copyForcesKernel, numAtoms); cl.executeKernel(copyForcesKernel, numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]); innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
} }
...@@ -6852,8 +6799,8 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& ...@@ -6852,8 +6799,8 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
// Initialize the listeners. // Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cl, *invAtomOrder); ReorderListener* listener1 = new ReorderListener(cl, invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cl2, *innerInvAtomOrder); ReorderListener* listener2 = new ReorderListener(cl2, innerInvAtomOrder);
cl.addReorderListener(listener1); cl.addReorderListener(listener1);
cl2.addReorderListener(listener2); cl2.addReorderListener(listener2);
listener1->execute(); listener1->execute();
...@@ -6866,7 +6813,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& ...@@ -6866,7 +6813,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer()); copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer()); copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer()); copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder->getDeviceBuffer()); copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder.getDeviceBuffer());
copyStateKernel.setArg<cl_int>(8, numAtoms); copyStateKernel.setArg<cl_int>(8, numAtoms);
if (cl.getUseMixedPrecision()) { if (cl.getUseMixedPrecision()) {
copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer()); copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
...@@ -6877,7 +6824,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& ...@@ -6877,7 +6824,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
copyStateKernel.setArg<void*>(5, NULL); copyStateKernel.setArg<void*>(5, NULL);
} }
copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder->getDeviceBuffer()); copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder.getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer()); copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer()); copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer());
copyForcesKernel.setArg<cl_int>(4, numAtoms); copyForcesKernel.setArg<cl_int>(4, numAtoms);
...@@ -6885,7 +6832,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& ...@@ -6885,7 +6832,7 @@ void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl&
addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer()); addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
addForcesKernel.setArg<cl_int>(1, numAtoms); addForcesKernel.setArg<cl_int>(1, numAtoms);
for (int i = 0; i < cvForces.size(); i++) for (int i = 0; i < cvForces.size(); i++)
addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i]->getDeviceBuffer()); addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i].getDeviceBuffer());
} }
cl.executeKernel(copyStateKernel, numAtoms); cl.executeKernel(copyStateKernel, numAtoms);
Vec3 a, b, c; Vec3 a, b, c;
...@@ -6917,15 +6864,6 @@ private: ...@@ -6917,15 +6864,6 @@ private:
set<int> particles; set<int> particles;
}; };
OpenCLCalcRMSDForceKernel::~OpenCLCalcRMSDForceKernel() {
if (referencePos != NULL)
delete referencePos;
if (particles != NULL)
delete particles;
if (buffer != NULL)
delete buffer;
}
void OpenCLCalcRMSDForceKernel::initialize(const System& system, const RMSDForce& force) { void OpenCLCalcRMSDForceKernel::initialize(const System& system, const RMSDForce& force) {
// Create data structures. // Create data structures.
...@@ -6934,9 +6872,9 @@ void OpenCLCalcRMSDForceKernel::initialize(const System& system, const RMSDForce ...@@ -6934,9 +6872,9 @@ void OpenCLCalcRMSDForceKernel::initialize(const System& system, const RMSDForce
int numParticles = force.getParticles().size(); int numParticles = force.getParticles().size();
if (numParticles == 0) if (numParticles == 0)
numParticles = system.getNumParticles(); numParticles = system.getNumParticles();
referencePos = new OpenCLArray(cl, system.getNumParticles(), 4*elementSize, "referencePos"); referencePos.initialize(cl, system.getNumParticles(), 4*elementSize, "referencePos");
particles = OpenCLArray::create<cl_int>(cl, numParticles, "particles"); particles.initialize<cl_int>(cl, numParticles, "particles");
buffer = new OpenCLArray(cl, 13, elementSize, "buffer"); buffer.initialize(cl, 13, elementSize, "buffer");
recordParameters(force); recordParameters(force);
info = new ForceInfo(force); info = new ForceInfo(force);
cl.addForce(info); cl.addForce(info);
...@@ -6965,18 +6903,18 @@ void OpenCLCalcRMSDForceKernel::recordParameters(const RMSDForce& force) { ...@@ -6965,18 +6903,18 @@ void OpenCLCalcRMSDForceKernel::recordParameters(const RMSDForce& force) {
// Upload them to the device. // Upload them to the device.
particles->upload(particleVec); particles.upload(particleVec);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
vector<mm_double4> pos; vector<mm_double4> pos;
for (Vec3 p : centeredPositions) for (Vec3 p : centeredPositions)
pos.push_back(mm_double4(p[0], p[1], p[2], 0)); pos.push_back(mm_double4(p[0], p[1], p[2], 0));
referencePos->upload(pos); referencePos.upload(pos);
} }
else { else {
vector<mm_float4> pos; vector<mm_float4> pos;
for (Vec3 p : centeredPositions) for (Vec3 p : centeredPositions)
pos.push_back(mm_float4(p[0], p[1], p[2], 0)); pos.push_back(mm_float4(p[0], p[1], p[2], 0));
referencePos->upload(pos); referencePos.upload(pos);
} }
// Record the sum of the norms of the reference positions. // Record the sum of the norms of the reference positions.
...@@ -6998,13 +6936,13 @@ template <class REAL> ...@@ -6998,13 +6936,13 @@ template <class REAL>
double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) { double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Execute the first kernel. // Execute the first kernel.
int numParticles = particles->getSize(); int numParticles = particles.getSize();
int blockSize = min(256, (int) kernel1.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(cl.getDevice())); int blockSize = min(256, (int) kernel1.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(cl.getDevice()));
kernel1.setArg<cl_int>(0, numParticles); kernel1.setArg<cl_int>(0, numParticles);
kernel1.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, referencePos->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, referencePos.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, particles->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(3, particles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, buffer->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(4, buffer.getDeviceBuffer());
kernel1.setArg(5, blockSize*sizeof(REAL), NULL); kernel1.setArg(5, blockSize*sizeof(REAL), NULL);
cl.executeKernel(kernel1, blockSize, blockSize); cl.executeKernel(kernel1, blockSize, blockSize);
...@@ -7012,7 +6950,7 @@ double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) { ...@@ -7012,7 +6950,7 @@ double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// and eigenvector. // and eigenvector.
vector<REAL> b; vector<REAL> b;
buffer->download(b); buffer.download(b);
Array2D<double> F(4, 4); Array2D<double> F(4, 4);
F[0][0] = b[0*3+0] + b[1*3+1] + b[2*3+2]; F[0][0] = b[0*3+0] + b[1*3+1] + b[2*3+2];
F[1][0] = b[1*3+2] - b[2*3+1]; F[1][0] = b[1*3+2] - b[2*3+1];
...@@ -7066,30 +7004,25 @@ double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) { ...@@ -7066,30 +7004,25 @@ double OpenCLCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Upload it to the device and invoke the kernel to apply forces. // Upload it to the device and invoke the kernel to apply forces.
buffer->upload(b); buffer.upload(b);
kernel2.setArg<cl_int>(0, numParticles); kernel2.setArg<cl_int>(0, numParticles);
kernel2.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, referencePos->getDeviceBuffer()); kernel2.setArg<cl::Buffer>(2, referencePos.getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, particles->getDeviceBuffer()); kernel2.setArg<cl::Buffer>(3, particles.getDeviceBuffer());
kernel2.setArg<cl::Buffer>(4, buffer->getDeviceBuffer()); kernel2.setArg<cl::Buffer>(4, buffer.getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, cl.getForceBuffers().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(5, cl.getForceBuffers().getDeviceBuffer());
cl.executeKernel(kernel2, numParticles); cl.executeKernel(kernel2, numParticles);
return rmsd; return rmsd;
} }
void OpenCLCalcRMSDForceKernel::copyParametersToContext(ContextImpl& context, const RMSDForce& force) { void OpenCLCalcRMSDForceKernel::copyParametersToContext(ContextImpl& context, const RMSDForce& force) {
if (referencePos->getSize() != force.getReferencePositions().size()) if (referencePos.getSize() != force.getReferencePositions().size())
throw OpenMMException("updateParametersInContext: The number of reference positions has changed"); throw OpenMMException("updateParametersInContext: The number of reference positions has changed");
int numParticles = force.getParticles().size(); int numParticles = force.getParticles().size();
if (numParticles == 0) if (numParticles == 0)
numParticles = context.getSystem().getNumParticles(); numParticles = context.getSystem().getNumParticles();
if (numParticles != particles->getSize()) { if (numParticles != particles.getSize())
// Recreate the particles array. particles.resize(numParticles);
delete particles;
particles = NULL;
particles = OpenCLArray::create<cl_int>(cl, numParticles, "particles");
}
recordParameters(force); recordParameters(force);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -7160,11 +7093,6 @@ double OpenCLIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -7160,11 +7093,6 @@ double OpenCLIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& contex
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize()); return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
} }
OpenCLIntegrateLangevinStepKernel::~OpenCLIntegrateLangevinStepKernel() {
if (params != NULL)
delete params;
}
void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) { void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
...@@ -7174,7 +7102,7 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L ...@@ -7174,7 +7102,7 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines, ""); cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines, "");
kernel1 = cl::Kernel(program, "integrateLangevinPart1"); kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2"); kernel2 = cl::Kernel(program, "integrateLangevinPart2");
params = new OpenCLArray(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams"); params.initialize(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams");
prevStepSize = -1.0; prevStepSize = -1.0;
} }
...@@ -7186,7 +7114,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -7186,7 +7114,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(3, params.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
...@@ -7207,18 +7135,18 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -7207,18 +7135,18 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
double fscale = (friction == 0 ? stepSize : (1-vscale)/friction); double fscale = (friction == 0 ? stepSize : (1-vscale)/friction);
double noisescale = sqrt(kT*(1-vscale*vscale)); double noisescale = sqrt(kT*(1-vscale*vscale));
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
vector<cl_double> p(params->getSize()); vector<cl_double> p(params.getSize());
p[0] = vscale; p[0] = vscale;
p[1] = fscale; p[1] = fscale;
p[2] = noisescale; p[2] = noisescale;
params->upload(p); params.upload(p);
} }
else { else {
vector<cl_float> p(params->getSize()); vector<cl_float> p(params.getSize());
p[0] = (cl_float) vscale; p[0] = (cl_float) vscale;
p[1] = (cl_float) fscale; p[1] = (cl_float) fscale;
p[2] = (cl_float) noisescale; p[2] = (cl_float) noisescale;
params->upload(p); params.upload(p);
} }
prevTemp = temperature; prevTemp = temperature;
prevFriction = friction; prevFriction = friction;
...@@ -7429,11 +7357,6 @@ double OpenCLIntegrateVariableVerletStepKernel::computeKineticEnergy(ContextImpl ...@@ -7429,11 +7357,6 @@ double OpenCLIntegrateVariableVerletStepKernel::computeKineticEnergy(ContextImpl
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize()); return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
} }
OpenCLIntegrateVariableLangevinStepKernel::~OpenCLIntegrateVariableLangevinStepKernel() {
if (params != NULL)
delete params;
}
void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) { void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
...@@ -7444,9 +7367,9 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, ...@@ -7444,9 +7367,9 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
kernel1 = cl::Kernel(program, "integrateLangevinPart1"); kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2"); kernel2 = cl::Kernel(program, "integrateLangevinPart2");
selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize"); selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize");
params = new OpenCLArray(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams"); params.initialize(cl, 3, cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float), "langevinParams");
blockSize = min(256, system.getNumParticles()); blockSize = min(256, system.getNumParticles());
blockSize = max(blockSize, params->getSize()); blockSize = max(blockSize, params.getSize());
blockSize = min(blockSize, (int) selectSizeKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(cl.getDevice())); blockSize = min(blockSize, (int) selectSizeKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(cl.getDevice()));
} }
...@@ -7459,7 +7382,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, ...@@ -7459,7 +7382,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(3, params.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
...@@ -7470,9 +7393,9 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, ...@@ -7470,9 +7393,9 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
selectSizeKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(7, params->getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(7, params.getDeviceBuffer());
int elementSize = (useDouble ? sizeof(cl_double) : sizeof(cl_float)); int elementSize = (useDouble ? sizeof(cl_double) : sizeof(cl_float));
selectSizeKernel.setArg(8, params->getSize()*elementSize, NULL); selectSizeKernel.setArg(8, params.getSize()*elementSize, NULL);
selectSizeKernel.setArg(9, blockSize*elementSize, NULL); selectSizeKernel.setArg(9, blockSize*elementSize, NULL);
} }
...@@ -7619,24 +7542,8 @@ private: ...@@ -7619,24 +7542,8 @@ private:
}; };
OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() { OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() {
if (globalValues != NULL)
delete globalValues;
if (sumBuffer != NULL)
delete sumBuffer;
if (summedValue != NULL)
delete summedValue;
if (uniformRandoms != NULL)
delete uniformRandoms;
if (randomSeed != NULL)
delete randomSeed;
if (perDofEnergyParamDerivs != NULL)
delete perDofEnergyParamDerivs;
if (perDofValues != NULL) if (perDofValues != NULL)
delete perDofValues; delete perDofValues;
for (auto function : tabulatedFunctions)
delete function;
for (auto& f : savedForces)
delete f.second;
} }
void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) { void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
...@@ -7644,8 +7551,8 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus ...@@ -7644,8 +7551,8 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables(); numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
sumBuffer = new OpenCLArray(cl, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer"); sumBuffer.initialize(cl, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer");
summedValue = new OpenCLArray(cl, 1, elementSize, "summedValue"); summedValue.initialize(cl, 1, elementSize, "summedValue");
perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision()); perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent)); cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed()); SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
...@@ -7728,6 +7635,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7728,6 +7635,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
vector<pair<string, string> > functionNames; vector<pair<string, string> > functionNames;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
vector<string> tableTypes; vector<string> tableTypes;
tabulatedFunctions.resize(integrator.getNumTabulatedFunctions());
for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) { for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) {
functionList.push_back(&integrator.getTabulatedFunction(i)); functionList.push_back(&integrator.getTabulatedFunction(i));
string name = integrator.getTabulatedFunctionName(i); string name = integrator.getTabulatedFunctionName(i);
...@@ -7736,8 +7644,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7736,8 +7644,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i)); functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i));
int width; int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width); vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
if (width == 1) if (width == 1)
tableTypes.push_back("float"); tableTypes.push_back("float");
else else
...@@ -7801,8 +7709,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7801,8 +7709,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
forceGroupFlags[step] = 1<<forceGroup[step]; forceGroupFlags[step] = 1<<forceGroup[step];
if (forceGroupFlags[step] == -2 && step > 0) if (forceGroupFlags[step] == -2 && step > 0)
forceGroupFlags[step] = forceGroupFlags[step-1]; forceGroupFlags[step] = forceGroupFlags[step-1];
if (forceGroupFlags[step] != -2 && savedForces.find(forceGroupFlags[step]) == savedForces.end()) if (forceGroupFlags[step] != -2 && savedForces.find(forceGroupFlags[step]) == savedForces.end()) {
savedForces[forceGroupFlags[step]] = new OpenCLArray(cl, cl.getForce().getSize(), cl.getForce().getElementSize(), "savedForces"); savedForces[forceGroupFlags[step]] = OpenCLArray();
savedForces[forceGroupFlags[step]].initialize(cl, cl.getForce().getSize(), cl.getForce().getElementSize(), "savedForces");
}
} }
// Allocate space for storing global values, both on the host and the device. // Allocate space for storing global values, both on the host and the device.
...@@ -7810,7 +7720,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7810,7 +7720,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
globalValuesFloat.resize(expressionSet.getNumVariables()); globalValuesFloat.resize(expressionSet.getNumVariables());
globalValuesDouble.resize(expressionSet.getNumVariables()); globalValuesDouble.resize(expressionSet.getNumVariables());
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
globalValues = new OpenCLArray(cl, expressionSet.getNumVariables(), elementSize, "globalValues"); globalValues.initialize(cl, expressionSet.getNumVariables(), elementSize, "globalValues");
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) { for (int i = 0; i < integrator.getNumGlobalVariables(); i++) {
globalValuesDouble[globalVariableIndex[i]] = initialGlobalVariables[i]; globalValuesDouble[globalVariableIndex[i]] = initialGlobalVariables[i];
expressionSet.setVariable(globalVariableIndex[i], initialGlobalVariables[i]); expressionSet.setVariable(globalVariableIndex[i], initialGlobalVariables[i]);
...@@ -7823,7 +7733,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7823,7 +7733,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
int numContextParams = context.getParameters().size(); int numContextParams = context.getParameters().size();
localPerDofEnergyParamDerivsFloat.resize(numContextParams); localPerDofEnergyParamDerivsFloat.resize(numContextParams);
localPerDofEnergyParamDerivsDouble.resize(numContextParams); localPerDofEnergyParamDerivsDouble.resize(numContextParams);
perDofEnergyParamDerivs = new OpenCLArray(cl, max(1, numContextParams), elementSize, "perDofEnergyParamDerivs"); perDofEnergyParamDerivs.initialize(cl, max(1, numContextParams), elementSize, "perDofEnergyParamDerivs");
// Record information about the targets of steps that will be stored in global variables. // Record information about the targets of steps that will be stored in global variables.
...@@ -7974,14 +7884,14 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7974,14 +7884,14 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, cl.getVelm().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, cl.getVelm().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, globalValues.getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, sumBuffer.getDeviceBuffer());
index += 4; index += 4;
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer());
for (auto& buffer : perDofValues->getBuffers()) for (auto& buffer : perDofValues->getBuffers())
kernel.setArg<cl::Memory>(index++, buffer.getMemory()); kernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto array : tabulatedFunctions) for (auto& array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
if (stepType[step] == CustomIntegrator::ComputeSum) { if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values. // Create a second kernel for this step that sums the values.
...@@ -7989,8 +7899,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7989,8 +7899,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum"); kernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum");
kernels[step].push_back(kernel); kernels[step].push_back(kernel);
index = 0; index = 0;
kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, sumBuffer.getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, summedValue.getDeviceBuffer());
kernel.setArg<cl_int>(index++, 3*numAtoms); kernel.setArg<cl_int>(index++, 3*numAtoms);
} }
} }
...@@ -8012,9 +7922,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -8012,9 +7922,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
int maxUniformRandoms = 1; int maxUniformRandoms = 1;
for (int required : requiredUniform) for (int required : requiredUniform)
maxUniformRandoms = max(maxUniformRandoms, required); maxUniformRandoms = max(maxUniformRandoms, required);
uniformRandoms = OpenCLArray::create<mm_float4>(cl, maxUniformRandoms, "uniformRandoms"); uniformRandoms.initialize<mm_float4>(cl, maxUniformRandoms, "uniformRandoms");
randomSeed = OpenCLArray::create<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed"); randomSeed.initialize<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
vector<mm_int4> seed(randomSeed->getSize()); vector<mm_int4> seed(randomSeed.getSize());
int rseed = integrator.getRandomNumberSeed(); int rseed = integrator.getRandomNumberSeed();
// A random seed of 0 means use a unique one // A random seed of 0 means use a unique one
if (rseed == 0) if (rseed == 0)
...@@ -8026,12 +7936,12 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -8026,12 +7936,12 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
s.z = r = (1664525*r + 1013904223) & 0xFFFFFFFF; s.z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
s.w = r = (1664525*r + 1013904223) & 0xFFFFFFFF; s.w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
} }
randomSeed->upload(seed); randomSeed.upload(seed);
cl::Program randomProgram = cl.createProgram(OpenCLKernelSources::customIntegrator, defines); cl::Program randomProgram = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
randomKernel = cl::Kernel(randomProgram, "generateRandomNumbers"); randomKernel = cl::Kernel(randomProgram, "generateRandomNumbers");
randomKernel.setArg<cl_int>(0, maxUniformRandoms); randomKernel.setArg<cl_int>(0, maxUniformRandoms);
randomKernel.setArg<cl::Buffer>(1, uniformRandoms->getDeviceBuffer()); randomKernel.setArg<cl::Buffer>(1, uniformRandoms.getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed->getDeviceBuffer()); randomKernel.setArg<cl::Buffer>(2, randomSeed.getDeviceBuffer());
// Create the kernel for computing kinetic energy. // Create the kernel for computing kinetic energy.
...@@ -8067,19 +7977,19 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -8067,19 +7977,19 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getVelm().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getVelm().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues.getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer.getDeviceBuffer());
index += 2; index += 2;
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms.getDeviceBuffer());
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kineticEnergyKernel.setArg<cl_double>(index++, 0.0); kineticEnergyKernel.setArg<cl_double>(index++, 0.0);
else else
kineticEnergyKernel.setArg<cl_float>(index++, 0.0f); kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory()); kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
for (auto array : tabulatedFunctions) for (auto& array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f"); keNeedsForce = usesVariable(keExpression, "f");
// Create a second kernel to sum the values. // Create a second kernel to sum the values.
...@@ -8087,8 +7997,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -8087,8 +7997,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
program = cl.createProgram(OpenCLKernelSources::customIntegrator, defines); program = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
sumKineticEnergyKernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum"); sumKineticEnergyKernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum");
index = 0; index = 0;
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer.getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer()); sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue.getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms); sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms);
// Delete the custom functions. // Delete the custom functions.
...@@ -8181,7 +8091,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -8181,7 +8091,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// The forces are still valid. We just need a different force group right now. Save the old // The forces are still valid. We just need a different force group right now. Save the old
// forces in case we need them again. // forces in case we need them again.
cl.getForce().copyTo(*savedForces[lastForceGroups]); cl.getForce().copyTo(savedForces[lastForceGroups]);
validSavedForces.insert(lastForceGroups); validSavedForces.insert(lastForceGroups);
} }
} }
...@@ -8196,7 +8106,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -8196,7 +8106,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
if (!computeEnergy && validSavedForces.find(forceGroups) != validSavedForces.end()) { if (!computeEnergy && validSavedForces.find(forceGroups) != validSavedForces.end()) {
// We can just restore the forces we saved earlier. // We can just restore the forces we saved earlier.
savedForces[forceGroups]->copyTo(cl.getForce()); savedForces[forceGroups].copyTo(cl.getForce());
context.getLastForceGroups() = forceGroups; context.getLastForceGroups() = forceGroups;
} }
else { else {
...@@ -8209,12 +8119,12 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -8209,12 +8119,12 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++) for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++)
localPerDofEnergyParamDerivsDouble[i] = energyParamDerivs[perDofEnergyParamDerivNames[i]]; localPerDofEnergyParamDerivsDouble[i] = energyParamDerivs[perDofEnergyParamDerivNames[i]];
perDofEnergyParamDerivs->upload(localPerDofEnergyParamDerivsDouble); perDofEnergyParamDerivs.upload(localPerDofEnergyParamDerivsDouble);
} }
else { else {
for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++) for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++)
localPerDofEnergyParamDerivsFloat[i] = (float) energyParamDerivs[perDofEnergyParamDerivNames[i]]; localPerDofEnergyParamDerivsFloat[i] = (float) energyParamDerivs[perDofEnergyParamDerivNames[i]];
perDofEnergyParamDerivs->upload(localPerDofEnergyParamDerivsFloat); perDofEnergyParamDerivs.upload(localPerDofEnergyParamDerivsFloat);
} }
} }
} }
...@@ -8227,18 +8137,18 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -8227,18 +8137,18 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// Upload the global values to the device. // Upload the global values to the device.
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
globalValues->upload(globalValuesDouble); globalValues.upload(globalValuesDouble);
else { else {
for (int j = 0; j < (int) globalValuesDouble.size(); j++) for (int j = 0; j < (int) globalValuesDouble.size(); j++)
globalValuesFloat[j] = (float) globalValuesDouble[j]; globalValuesFloat[j] = (float) globalValuesDouble[j];
globalValues->upload(globalValuesFloat); globalValues.upload(globalValuesFloat);
} }
} }
bool stepInvalidatesForces = invalidatesForces[step]; bool stepInvalidatesForces = invalidatesForces[step];
if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) { if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) {
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step])); kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms.getDeviceBuffer());
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kernels[step][0].setArg<cl_double>(11, energy); kernels[step][0].setArg<cl_double>(11, energy);
else else
...@@ -8256,24 +8166,24 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -8256,24 +8166,24 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
else if (stepType[step] == CustomIntegrator::ComputeSum) { else if (stepType[step] == CustomIntegrator::ComputeSum) {
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step])); kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms.getDeviceBuffer());
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kernels[step][0].setArg<cl_double>(11, energy); kernels[step][0].setArg<cl_double>(11, energy);
else else
kernels[step][0].setArg<cl_float>(11, (cl_float) energy); kernels[step][0].setArg<cl_float>(11, (cl_float) energy);
if (requiredUniform[step] > 0) if (requiredUniform[step] > 0)
cl.executeKernel(randomKernel, numAtoms); cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer); cl.clearBuffer(sumBuffer);
cl.executeKernel(kernels[step][0], numAtoms, 128); cl.executeKernel(kernels[step][0], numAtoms, 128);
cl.executeKernel(kernels[step][1], sumWorkGroupSize, sumWorkGroupSize); cl.executeKernel(kernels[step][1], sumWorkGroupSize, sumWorkGroupSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value; double value;
summedValue->download(&value); summedValue.download(&value);
recordGlobalValue(value, stepTarget[step], integrator); recordGlobalValue(value, stepTarget[step], integrator);
} }
else { else {
float value; float value;
summedValue->download(&value); summedValue.download(&value);
recordGlobalValue(value, stepTarget[step], integrator); recordGlobalValue(value, stepTarget[step], integrator);
} }
} }
...@@ -8363,19 +8273,19 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -8363,19 +8273,19 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1); energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1);
forcesAreValid = true; forcesAreValid = true;
} }
cl.clearBuffer(*sumBuffer); cl.clearBuffer(sumBuffer);
kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(9, 0); kineticEnergyKernel.setArg<cl_uint>(9, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms()); cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel, sumWorkGroupSize, sumWorkGroupSize); cl.executeKernel(sumKineticEnergyKernel, sumWorkGroupSize, sumWorkGroupSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double ke; double ke;
summedValue->download(&ke); summedValue.download(&ke);
return ke; return ke;
} }
else { else {
float ke; float ke;
summedValue->download(&ke); summedValue.download(&ke);
return ke; return ke;
} }
} }
...@@ -8410,7 +8320,7 @@ void OpenCLIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& conte ...@@ -8410,7 +8320,7 @@ void OpenCLIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& conte
} }
void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const { void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
if (globalValues == NULL) { if (!globalValues.isInitialized()) {
// The data structures haven't been created yet, so just return the list of values that was given earlier. // The data structures haven't been created yet, so just return the list of values that was given earlier.
values = initialGlobalVariables; values = initialGlobalVariables;
...@@ -8424,7 +8334,7 @@ void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, v ...@@ -8424,7 +8334,7 @@ void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, v
void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) { void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
if (numGlobalVariables == 0) if (numGlobalVariables == 0)
return; return;
if (globalValues == NULL) { if (!globalValues.isInitialized()) {
// The data structures haven't been created yet, so just store the list of values. // The data structures haven't been created yet, so just store the list of values.
initialGlobalVariables = values; initialGlobalVariables = values;
...@@ -8483,11 +8393,6 @@ void OpenCLIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, in ...@@ -8483,11 +8393,6 @@ void OpenCLIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, in
deviceValuesAreCurrent = false; deviceValuesAreCurrent = false;
} }
OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
if (atomGroups != NULL)
delete atomGroups;
}
void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) { void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
randomSeed = thermostat.getRandomNumberSeed(); randomSeed = thermostat.getRandomNumberSeed();
map<string, string> defines; map<string, string> defines;
...@@ -8499,13 +8404,13 @@ void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const ...@@ -8499,13 +8404,13 @@ void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const
// Create the arrays with the group definitions. // Create the arrays with the group definitions.
vector<vector<int> > groups = AndersenThermostatImpl::calcParticleGroups(system); vector<vector<int> > groups = AndersenThermostatImpl::calcParticleGroups(system);
atomGroups = OpenCLArray::create<int>(cl, cl.getNumAtoms(), "atomGroups"); atomGroups.initialize<int>(cl, cl.getNumAtoms(), "atomGroups");
vector<int> atoms(atomGroups->getSize()); vector<int> atoms(atomGroups.getSize());
for (int i = 0; i < (int) groups.size(); i++) { for (int i = 0; i < (int) groups.size(); i++) {
for (int j = 0; j < (int) groups[i].size(); j++) for (int j = 0; j < (int) groups[i].size(); j++)
atoms[groups[i][j]] = i; atoms[groups[i][j]] = i;
} }
atomGroups->upload(atoms); atomGroups.upload(atoms);
} }
void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) { void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
...@@ -8514,7 +8419,7 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) { ...@@ -8514,7 +8419,7 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
kernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer()); kernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
kernel.setArg<cl::Buffer>(3, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer()); kernel.setArg<cl::Buffer>(3, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(4, cl.getIntegrationUtilities().getRandom().getDeviceBuffer()); kernel.setArg<cl::Buffer>(4, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kernel.setArg<cl::Buffer>(6, atomGroups->getDeviceBuffer()); kernel.setArg<cl::Buffer>(6, atomGroups.getDeviceBuffer());
} }
kernel.setArg<cl_float>(0, (cl_float) context.getParameter(AndersenThermostat::CollisionFrequency())); kernel.setArg<cl_float>(0, (cl_float) context.getParameter(AndersenThermostat::CollisionFrequency()));
kernel.setArg<cl_float>(1, (cl_float) (BOLTZ*context.getParameter(AndersenThermostat::Temperature()))); kernel.setArg<cl_float>(1, (cl_float) (BOLTZ*context.getParameter(AndersenThermostat::Temperature())));
...@@ -8522,20 +8427,9 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) { ...@@ -8522,20 +8427,9 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
cl.executeKernel(kernel, cl.getNumAtoms()); cl.executeKernel(kernel, cl.getNumAtoms());
} }
OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
if (savedPositions != NULL)
delete savedPositions;
if (savedForces != NULL)
delete savedForces;
if (moleculeAtoms != NULL)
delete moleculeAtoms;
if (moleculeStartIndex != NULL)
delete moleculeStartIndex;
}
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) { void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions"); savedPositions.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedForces = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces"); savedForces.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat); cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat);
kernel = cl::Kernel(program, "scalePositions"); kernel = cl::Kernel(program, "scalePositions");
} }
...@@ -8548,10 +8442,10 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -8548,10 +8442,10 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
vector<vector<int> > molecules = context.getMolecules(); vector<vector<int> > molecules = context.getMolecules();
numMolecules = molecules.size(); numMolecules = molecules.size();
moleculeAtoms = OpenCLArray::create<int>(cl, cl.getNumAtoms(), "moleculeAtoms"); moleculeAtoms.initialize<int>(cl, cl.getNumAtoms(), "moleculeAtoms");
moleculeStartIndex = OpenCLArray::create<int>(cl, numMolecules+1, "moleculeStartIndex"); moleculeStartIndex.initialize<int>(cl, numMolecules+1, "moleculeStartIndex");
vector<int> atoms(moleculeAtoms->getSize()); vector<int> atoms(moleculeAtoms.getSize());
vector<int> startIndex(moleculeStartIndex->getSize()); vector<int> startIndex(moleculeStartIndex.getSize());
int index = 0; int index = 0;
for (int i = 0; i < numMolecules; i++) { for (int i = 0; i < numMolecules; i++) {
startIndex[i] = index; startIndex[i] = index;
...@@ -8559,19 +8453,19 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -8559,19 +8453,19 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
atoms[index++] = molecule; atoms[index++] = molecule;
} }
startIndex[numMolecules] = index; startIndex[numMolecules] = index;
moleculeAtoms->upload(atoms); moleculeAtoms.upload(atoms);
moleculeStartIndex->upload(startIndex); moleculeStartIndex.upload(startIndex);
// Initialize the kernel arguments. // Initialize the kernel arguments.
kernel.setArg<cl_int>(3, numMolecules); kernel.setArg<cl_int>(3, numMolecules);
kernel.setArg<cl::Buffer>(9, cl.getPosq().getDeviceBuffer()); kernel.setArg<cl::Buffer>(9, cl.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(10, moleculeAtoms->getDeviceBuffer()); kernel.setArg<cl::Buffer>(10, moleculeAtoms.getDeviceBuffer());
kernel.setArg<cl::Buffer>(11, moleculeStartIndex->getDeviceBuffer()); kernel.setArg<cl::Buffer>(11, moleculeStartIndex.getDeviceBuffer());
} }
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy); cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions.getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), savedForces->getDeviceBuffer(), 0, 0, bytesToCopy); cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), savedForces.getDeviceBuffer(), 0, 0, bytesToCopy);
kernel.setArg<cl_float>(0, (cl_float) scaleX); kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(1, (cl_float) scaleY); kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(2, (cl_float) scaleZ); kernel.setArg<cl_float>(2, (cl_float) scaleZ);
...@@ -8584,19 +8478,14 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -8584,19 +8478,14 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) { void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy); cl.getQueue().enqueueCopyBuffer(savedPositions.getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(savedForces->getDeviceBuffer(), cl.getForce().getDeviceBuffer(), 0, 0, bytesToCopy); cl.getQueue().enqueueCopyBuffer(savedForces.getDeviceBuffer(), cl.getForce().getDeviceBuffer(), 0, 0, bytesToCopy);
}
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
if (cmMomentum != NULL)
delete cmMomentum;
} }
void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) { void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
frequency = force.getFrequency(); frequency = force.getFrequency();
int numAtoms = cl.getNumAtoms(); int numAtoms = cl.getNumAtoms();
cmMomentum = OpenCLArray::create<mm_float4>(cl, (numAtoms+OpenCLContext::ThreadBlockSize-1)/OpenCLContext::ThreadBlockSize, "cmMomentum"); cmMomentum.initialize<mm_float4>(cl, (numAtoms+OpenCLContext::ThreadBlockSize-1)/OpenCLContext::ThreadBlockSize, "cmMomentum");
double totalMass = 0.0; double totalMass = 0.0;
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
totalMass += system.getParticleMass(i); totalMass += system.getParticleMass(i);
...@@ -8606,12 +8495,12 @@ void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotion ...@@ -8606,12 +8495,12 @@ void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotion
kernel1 = cl::Kernel(program, "calcCenterOfMassMomentum"); kernel1 = cl::Kernel(program, "calcCenterOfMassMomentum");
kernel1.setArg<cl_int>(0, numAtoms); kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, cmMomentum->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, cmMomentum.getDeviceBuffer());
kernel1.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL); kernel1.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
kernel2 = cl::Kernel(program, "removeCenterOfMassMomentum"); kernel2 = cl::Kernel(program, "removeCenterOfMassMomentum");
kernel2.setArg<cl_int>(0, numAtoms); kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, cmMomentum->getDeviceBuffer()); kernel2.setArg<cl::Buffer>(2, cmMomentum.getDeviceBuffer());
kernel2.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL); kernel2.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
} }
......
...@@ -55,9 +55,7 @@ private: ...@@ -55,9 +55,7 @@ private:
}; };
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true), OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL), numForceBuffers(0), blockSorter(NULL), pinnedCountBuffer(NULL), pinnedCountMemory(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), pinnedCountBuffer(NULL), pinnedCountMemory(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
// Decide how many thread blocks and force buffers to use. // Decide how many thread blocks and force buffers to use.
deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
...@@ -95,34 +93,6 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con ...@@ -95,34 +93,6 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
} }
OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() { OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
if (exclusionIndices != NULL)
delete exclusionIndices;
if (exclusionRowIndices != NULL)
delete exclusionRowIndices;
if (exclusionTiles != NULL)
delete exclusionTiles;
if (exclusions != NULL)
delete exclusions;
if (interactingTiles != NULL)
delete interactingTiles;
if (interactingAtoms != NULL)
delete interactingAtoms;
if (interactionCount != NULL)
delete interactionCount;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (sortedBlocks != NULL)
delete sortedBlocks;
if (sortedBlockCenter != NULL)
delete sortedBlockCenter;
if (sortedBlockBoundingBox != NULL)
delete sortedBlockBoundingBox;
if (oldPositions != NULL)
delete oldPositions;
if (rebuildNeighborList != NULL)
delete rebuildNeighborList;
if (blockSorter != NULL) if (blockSorter != NULL)
delete blockSorter; delete blockSorter;
if (pinnedCountBuffer != NULL) if (pinnedCountBuffer != NULL)
...@@ -246,8 +216,8 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -246,8 +216,8 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
for (set<pair<int, int> >::const_iterator iter = tilesWithExclusions.begin(); iter != tilesWithExclusions.end(); ++iter) for (set<pair<int, int> >::const_iterator iter = tilesWithExclusions.begin(); iter != tilesWithExclusions.end(); ++iter)
exclusionTilesVec.push_back(mm_ushort2((unsigned short) iter->first, (unsigned short) iter->second)); exclusionTilesVec.push_back(mm_ushort2((unsigned short) iter->first, (unsigned short) iter->second));
sort(exclusionTilesVec.begin(), exclusionTilesVec.end(), context.getSIMDWidth() <= 32 ? compareUshort2 : compareUshort2LargeSIMD); sort(exclusionTilesVec.begin(), exclusionTilesVec.end(), context.getSIMDWidth() <= 32 ? compareUshort2 : compareUshort2LargeSIMD);
exclusionTiles = OpenCLArray::create<mm_ushort2>(context, exclusionTilesVec.size(), "exclusionTiles"); exclusionTiles.initialize<mm_ushort2>(context, exclusionTilesVec.size(), "exclusionTiles");
exclusionTiles->upload(exclusionTilesVec); exclusionTiles.upload(exclusionTilesVec);
map<pair<int, int>, int> exclusionTileMap; map<pair<int, int>, int> exclusionTileMap;
for (int i = 0; i < (int) exclusionTilesVec.size(); i++) { for (int i = 0; i < (int) exclusionTilesVec.size(); i++) {
mm_ushort2 tile = exclusionTilesVec[i]; mm_ushort2 tile = exclusionTilesVec[i];
...@@ -268,17 +238,17 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -268,17 +238,17 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
maxExclusions = 0; maxExclusions = 0;
for (int i = 0; i < (int) exclusionBlocksForBlock.size(); i++) for (int i = 0; i < (int) exclusionBlocksForBlock.size(); i++)
maxExclusions = (maxExclusions > exclusionBlocksForBlock[i].size() ? maxExclusions : exclusionBlocksForBlock[i].size()); maxExclusions = (maxExclusions > exclusionBlocksForBlock[i].size() ? maxExclusions : exclusionBlocksForBlock[i].size());
exclusionIndices = OpenCLArray::create<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices"); exclusionIndices.initialize<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices");
exclusionRowIndices = OpenCLArray::create<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices"); exclusionRowIndices.initialize<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
exclusionIndices->upload(exclusionIndicesVec); exclusionIndices.upload(exclusionIndicesVec);
exclusionRowIndices->upload(exclusionRowIndicesVec); exclusionRowIndices.upload(exclusionRowIndicesVec);
// Record the exclusion data. // Record the exclusion data.
exclusions = OpenCLArray::create<cl_uint>(context, tilesWithExclusions.size()*OpenCLContext::TileSize, "exclusions"); exclusions.initialize<cl_uint>(context, tilesWithExclusions.size()*OpenCLContext::TileSize, "exclusions");
cl_uint allFlags = (cl_uint) -1; cl_uint allFlags = (cl_uint) -1;
vector<cl_uint> exclusionVec(exclusions->getSize(), allFlags); vector<cl_uint> exclusionVec(exclusions.getSize(), allFlags);
for (int i = 0; i < exclusions->getSize(); ++i) for (int i = 0; i < exclusions.getSize(); ++i)
exclusionVec[i] = 0xFFFFFFFF; exclusionVec[i] = 0xFFFFFFFF;
for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) { for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
int x = atom1/OpenCLContext::TileSize; int x = atom1/OpenCLContext::TileSize;
...@@ -298,7 +268,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -298,7 +268,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
} }
} }
atomExclusions.clear(); // We won't use this again, so free the memory it used atomExclusions.clear(); // We won't use this again, so free the memory it used
exclusions->upload(exclusionVec); exclusions.upload(exclusionVec);
// Create data structures for the neighbor list. // Create data structures for the neighbor list.
...@@ -312,20 +282,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -312,20 +282,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if (maxTiles < 1) if (maxTiles < 1)
maxTiles = 1; maxTiles = 1;
int numAtoms = context.getNumAtoms(); int numAtoms = context.getNumAtoms();
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles"); interactingTiles.initialize<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms.initialize<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount"); interactionCount.initialize<cl_uint>(context, 1, "interactionCount");
int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float)); int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
blockCenter = new OpenCLArray(context, numAtomBlocks, 4*elementSize, "blockCenter"); blockCenter.initialize(context, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(context, numAtomBlocks, 4*elementSize, "blockBoundingBox"); blockBoundingBox.initialize(context, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedBlocks = new OpenCLArray(context, numAtomBlocks, 2*elementSize, "sortedBlocks"); sortedBlocks.initialize(context, numAtomBlocks, 2*elementSize, "sortedBlocks");
sortedBlockCenter = new OpenCLArray(context, numAtomBlocks+1, 4*elementSize, "sortedBlockCenter"); sortedBlockCenter.initialize(context, numAtomBlocks+1, 4*elementSize, "sortedBlockCenter");
sortedBlockBoundingBox = new OpenCLArray(context, numAtomBlocks+1, 4*elementSize, "sortedBlockBoundingBox"); sortedBlockBoundingBox.initialize(context, numAtomBlocks+1, 4*elementSize, "sortedBlockBoundingBox");
oldPositions = new OpenCLArray(context, numAtoms, 4*elementSize, "oldPositions"); oldPositions.initialize(context, numAtoms, 4*elementSize, "oldPositions");
rebuildNeighborList = OpenCLArray::create<int>(context, 1, "rebuildNeighborList"); rebuildNeighborList.initialize<int>(context, 1, "rebuildNeighborList");
blockSorter = new OpenCLSort(context, new BlockSortTrait(context.getUseDoublePrecision()), numAtomBlocks); blockSorter = new OpenCLSort(context, new BlockSortTrait(context.getUseDoublePrecision()), numAtomBlocks);
vector<cl_uint> count(1, 0); vector<cl_uint> count(1, 0);
interactionCount->upload(count); interactionCount.upload(count);
} }
} }
...@@ -376,14 +346,14 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) { ...@@ -376,14 +346,14 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
forceRebuildNeighborList = true; forceRebuildNeighborList = true;
setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1); setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1);
context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms()); context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms());
blockSorter->sort(*sortedBlocks); blockSorter->sort(sortedBlocks);
kernels.sortBoxDataKernel.setArg<cl_int>(9, forceRebuildNeighborList); kernels.sortBoxDataKernel.setArg<cl_int>(9, forceRebuildNeighborList);
context.executeKernel(kernels.sortBoxDataKernel, context.getNumAtoms()); context.executeKernel(kernels.sortBoxDataKernel, context.getNumAtoms());
setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0); setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0);
context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize); context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
forceRebuildNeighborList = false; forceRebuildNeighborList = false;
lastCutoff = kernels.cutoffDistance; lastCutoff = kernels.cutoffDistance;
context.getQueue().enqueueReadBuffer(interactionCount->getDeviceBuffer(), CL_FALSE, 0, sizeof(int), pinnedCountMemory, NULL, &downloadCountEvent); context.getQueue().enqueueReadBuffer(interactionCount.getDeviceBuffer(), CL_FALSE, 0, sizeof(int), pinnedCountMemory, NULL, &downloadCountEvent);
} }
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) { void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
...@@ -407,7 +377,7 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include ...@@ -407,7 +377,7 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
bool OpenCLNonbondedUtilities::updateNeighborListSize() { bool OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
if (pinnedCountMemory[0] <= (unsigned int) interactingTiles->getSize()) if (pinnedCountMemory[0] <= (unsigned int) interactingTiles.getSize())
return false; return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent // The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
...@@ -417,31 +387,27 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() { ...@@ -417,31 +387,27 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
delete interactingTiles; interactingTiles.resize(maxTiles);
delete interactingAtoms; interactingAtoms.resize(OpenCLContext::TileSize*maxTiles);
interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails
interactingAtoms = NULL;
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) { for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
KernelSet& kernels = iter->second; KernelSet& kernels = iter->second;
if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) { if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) {
kernels.forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer()); kernels.forceKernel.setArg<cl::Buffer>(7, interactingTiles.getDeviceBuffer());
kernels.forceKernel.setArg<cl_uint>(14, maxTiles); kernels.forceKernel.setArg<cl_uint>(14, maxTiles);
kernels.forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer()); kernels.forceKernel.setArg<cl::Buffer>(17, interactingAtoms.getDeviceBuffer());
} }
if (*reinterpret_cast<cl_kernel*>(&kernels.energyKernel) != NULL) { if (*reinterpret_cast<cl_kernel*>(&kernels.energyKernel) != NULL) {
kernels.energyKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer()); kernels.energyKernel.setArg<cl::Buffer>(7, interactingTiles.getDeviceBuffer());
kernels.energyKernel.setArg<cl_uint>(14, maxTiles); kernels.energyKernel.setArg<cl_uint>(14, maxTiles);
kernels.energyKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer()); kernels.energyKernel.setArg<cl::Buffer>(17, interactingAtoms.getDeviceBuffer());
} }
if (*reinterpret_cast<cl_kernel*>(&kernels.forceEnergyKernel) != NULL) { if (*reinterpret_cast<cl_kernel*>(&kernels.forceEnergyKernel) != NULL) {
kernels.forceEnergyKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer()); kernels.forceEnergyKernel.setArg<cl::Buffer>(7, interactingTiles.getDeviceBuffer());
kernels.forceEnergyKernel.setArg<cl_uint>(14, maxTiles); kernels.forceEnergyKernel.setArg<cl_uint>(14, maxTiles);
kernels.forceEnergyKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer()); kernels.forceEnergyKernel.setArg<cl::Buffer>(17, interactingAtoms.getDeviceBuffer());
} }
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles); kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
} }
forceRebuildNeighborList = true; forceRebuildNeighborList = true;
...@@ -506,7 +472,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) { ...@@ -506,7 +472,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
defines["PADDING"] = context.doubleToString(padding); defines["PADDING"] = context.doubleToString(padding);
defines["PADDED_CUTOFF"] = context.doubleToString(paddedCutoff); defines["PADDED_CUTOFF"] = context.doubleToString(paddedCutoff);
defines["PADDED_CUTOFF_SQUARED"] = context.doubleToString(paddedCutoff*paddedCutoff); defines["PADDED_CUTOFF_SQUARED"] = context.doubleToString(paddedCutoff*paddedCutoff);
defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles->getSize()); defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles.getSize());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
defines["SIMD_WIDTH"] = context.intToString(context.getSIMDWidth()); defines["SIMD_WIDTH"] = context.intToString(context.getSIMDWidth());
if (usePeriodic) if (usePeriodic)
...@@ -523,36 +489,36 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) { ...@@ -523,36 +489,36 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels.findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds"); kernels.findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
kernels.findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms()); kernels.findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(6, context.getPosq().getDeviceBuffer()); kernels.findBlockBoundsKernel.setArg<cl::Buffer>(6, context.getPosq().getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(7, blockCenter->getDeviceBuffer()); kernels.findBlockBoundsKernel.setArg<cl::Buffer>(7, blockCenter.getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(8, blockBoundingBox->getDeviceBuffer()); kernels.findBlockBoundsKernel.setArg<cl::Buffer>(8, blockBoundingBox.getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(9, rebuildNeighborList->getDeviceBuffer()); kernels.findBlockBoundsKernel.setArg<cl::Buffer>(9, rebuildNeighborList.getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(10, sortedBlocks->getDeviceBuffer()); kernels.findBlockBoundsKernel.setArg<cl::Buffer>(10, sortedBlocks.getDeviceBuffer());
kernels.sortBoxDataKernel = cl::Kernel(interactingBlocksProgram, "sortBoxData"); kernels.sortBoxDataKernel = cl::Kernel(interactingBlocksProgram, "sortBoxData");
kernels.sortBoxDataKernel.setArg<cl::Buffer>(0, sortedBlocks->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(0, sortedBlocks.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(1, blockCenter->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(1, blockCenter.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(2, blockBoundingBox->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(2, blockBoundingBox.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(3, sortedBlockCenter->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(3, sortedBlockCenter.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(4, sortedBlockBoundingBox->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(4, sortedBlockBoundingBox.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(5, context.getPosq().getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(5, context.getPosq().getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(6, oldPositions->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(6, oldPositions.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(7, interactionCount.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(8, rebuildNeighborList->getDeviceBuffer()); kernels.sortBoxDataKernel.setArg<cl::Buffer>(8, rebuildNeighborList.getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl_int>(9, true); kernels.sortBoxDataKernel.setArg<cl_int>(9, true);
kernels.findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions"); kernels.findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(8, context.getPosq().getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(8, context.getPosq().getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, interactingTiles->getSize()); kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, interactingTiles.getSize());
kernels.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex); kernels.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
kernels.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks); kernels.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(12, sortedBlocks->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(12, sortedBlocks.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(13, sortedBlockCenter->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(13, sortedBlockCenter.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(14, sortedBlockBoundingBox->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(14, sortedBlockBoundingBox.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(15, exclusionIndices->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(15, exclusionIndices.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(16, exclusionRowIndices->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(16, exclusionRowIndices.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(17, oldPositions->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(17, oldPositions.getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList->getDeviceBuffer()); kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList.getDeviceBuffer());
if (kernels.findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) { if (kernels.findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) {
// The device can't handle this block size, so reduce it. // The device can't handle this block size, so reduce it.
...@@ -700,7 +666,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -700,7 +666,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
defines["TILE_SIZE"] = context.intToString(OpenCLContext::TileSize); defines["TILE_SIZE"] = context.intToString(OpenCLContext::TileSize);
int numExclusionTiles = exclusionTiles->getSize(); int numExclusionTiles = exclusionTiles.getSize();
defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(numExclusionTiles); defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(numExclusionTiles);
int numContexts = context.getPlatformData().contexts.size(); int numContexts = context.getPlatformData().contexts.size();
int startExclusionIndex = context.getContextIndex()*numExclusionTiles/numContexts; int startExclusionIndex = context.getContextIndex()*numExclusionTiles/numContexts;
...@@ -726,18 +692,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -726,18 +692,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel.setArg<cl::Buffer>(index++, context.getForceBuffers().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, context.getForceBuffers().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, context.getEnergyBuffer().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, context.getEnergyBuffer().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, exclusions.getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, exclusionTiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, exclusionTiles.getDeviceBuffer());
kernel.setArg<cl_uint>(index++, startTileIndex); kernel.setArg<cl_uint>(index++, startTileIndex);
kernel.setArg<cl_uint>(index++, numTiles); kernel.setArg<cl_uint>(index++, numTiles);
if (useCutoff) { if (useCutoff) {
kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactingTiles.getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactionCount.getDeviceBuffer());
index += 5; // The periodic box size arguments are set when the kernel is executed. index += 5; // The periodic box size arguments are set when the kernel is executed.
kernel.setArg<cl_uint>(index++, interactingTiles->getSize()); kernel.setArg<cl_uint>(index++, interactingTiles.getSize());
kernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, blockCenter.getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, blockBoundingBox.getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, interactingAtoms->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactingAtoms.getDeviceBuffer());
} }
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
kernel.setArg<cl::Memory>(index++, params[i].getMemory()); kernel.setArg<cl::Memory>(index++, params[i].getMemory());
......
...@@ -118,14 +118,12 @@ private: ...@@ -118,14 +118,12 @@ private:
OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) : OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()), CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
tileCounts(data.contexts.size()), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedPositionMemory(NULL), pinnedForceBuffer(NULL), pinnedForceMemory(NULL) { tileCounts(data.contexts.size()), pinnedPositionBuffer(NULL), pinnedPositionMemory(NULL), pinnedForceBuffer(NULL), pinnedForceMemory(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++) for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcForcesAndEnergyKernel(name, platform, *data.contexts[i]))); kernels.push_back(Kernel(new OpenCLCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
} }
OpenCLParallelCalcForcesAndEnergyKernel::~OpenCLParallelCalcForcesAndEnergyKernel() { OpenCLParallelCalcForcesAndEnergyKernel::~OpenCLParallelCalcForcesAndEnergyKernel() {
if (contextForces != NULL)
delete contextForces;
if (pinnedPositionBuffer != NULL) if (pinnedPositionBuffer != NULL)
delete pinnedPositionBuffer; delete pinnedPositionBuffer;
if (pinnedForceBuffer != NULL) if (pinnedForceBuffer != NULL)
...@@ -142,8 +140,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -142,8 +140,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
OpenCLContext& cl0 = *data.contexts[0]; OpenCLContext& cl0 = *data.contexts[0];
int elementSize = (cl0.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (cl0.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
if (contextForces == NULL) { if (!contextForces.isInitialized()) {
contextForces = OpenCLArray::create<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(), contextForces.initialize<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces"); data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces");
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*elementSize; int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*elementSize;
pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
...@@ -179,9 +177,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c ...@@ -179,9 +177,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext& cl = *data.contexts[0]; OpenCLContext& cl = *data.contexts[0];
int numAtoms = cl.getPaddedNumAtoms(); int numAtoms = cl.getPaddedNumAtoms();
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*elementSize, cl.getQueue().enqueueWriteBuffer(contextForces.getDeviceBuffer(), CL_FALSE, numAtoms*elementSize,
numAtoms*(data.contexts.size()-1)*elementSize, pinnedForceMemory); numAtoms*(data.contexts.size()-1)*elementSize, pinnedForceMemory);
cl.reduceBuffer(*contextForces, data.contexts.size()); cl.reduceBuffer(contextForces, data.contexts.size());
// Balance work between the contexts by transferring a little nonbonded work from the context that // Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first. // finished last to the one that finished first.
......
...@@ -31,8 +31,7 @@ ...@@ -31,8 +31,7 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait), OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait), dataLength(length) {
dataRange(NULL), bucketOfElement(NULL), offsetInBucket(NULL), bucketOffset(NULL), buckets(NULL), dataLength(length) {
// Create kernels. // Create kernels.
std::map<std::string, std::string> replacements; std::map<std::string, std::string> replacements;
...@@ -81,26 +80,16 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le ...@@ -81,26 +80,16 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
// Create workspace arrays. // Create workspace arrays.
if (!isShortList) { if (!isShortList) {
dataRange = new OpenCLArray(context, 2, trait->getKeySize(), "sortDataRange"); dataRange.initialize(context, 2, trait->getKeySize(), "sortDataRange");
bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset"); bucketOffset.initialize<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement"); bucketOfElement.initialize<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket"); offsetInBucket.initialize<cl_uint>(context, length, "offsetInBucket");
buckets = new OpenCLArray(context, length, trait->getDataSize(), "buckets"); buckets.initialize(context, length, trait->getDataSize(), "buckets");
} }
} }
OpenCLSort::~OpenCLSort() { OpenCLSort::~OpenCLSort() {
delete trait; delete trait;
if (dataRange != NULL)
delete dataRange;
if (bucketOfElement != NULL)
delete bucketOfElement;
if (offsetInBucket != NULL)
delete offsetInBucket;
if (bucketOffset != NULL)
delete bucketOffset;
if (buckets != NULL)
delete buckets;
} }
void OpenCLSort::sort(OpenCLArray& data) { void OpenCLSort::sort(OpenCLArray& data) {
...@@ -119,14 +108,14 @@ void OpenCLSort::sort(OpenCLArray& data) { ...@@ -119,14 +108,14 @@ void OpenCLSort::sort(OpenCLArray& data) {
else { else {
// Compute the range of data values. // Compute the range of data values.
unsigned int numBuckets = bucketOffset->getSize(); unsigned int numBuckets = bucketOffset.getSize();
computeRangeKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); computeRangeKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
computeRangeKernel.setArg<cl_uint>(1, data.getSize()); computeRangeKernel.setArg<cl_uint>(1, data.getSize());
computeRangeKernel.setArg<cl::Buffer>(2, dataRange->getDeviceBuffer()); computeRangeKernel.setArg<cl::Buffer>(2, dataRange.getDeviceBuffer());
computeRangeKernel.setArg(3, rangeKernelSize*trait->getKeySize(), NULL); computeRangeKernel.setArg(3, rangeKernelSize*trait->getKeySize(), NULL);
computeRangeKernel.setArg(4, rangeKernelSize*trait->getKeySize(), NULL); computeRangeKernel.setArg(4, rangeKernelSize*trait->getKeySize(), NULL);
computeRangeKernel.setArg<cl_int>(5, numBuckets); computeRangeKernel.setArg<cl_int>(5, numBuckets);
computeRangeKernel.setArg<cl::Buffer>(6, bucketOffset->getDeviceBuffer()); computeRangeKernel.setArg<cl::Buffer>(6, bucketOffset.getDeviceBuffer());
context.executeKernel(computeRangeKernel, rangeKernelSize, rangeKernelSize); context.executeKernel(computeRangeKernel, rangeKernelSize, rangeKernelSize);
// Assign array elements to buckets. // Assign array elements to buckets.
...@@ -134,35 +123,35 @@ void OpenCLSort::sort(OpenCLArray& data) { ...@@ -134,35 +123,35 @@ void OpenCLSort::sort(OpenCLArray& data) {
assignElementsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
assignElementsKernel.setArg<cl_int>(1, data.getSize()); assignElementsKernel.setArg<cl_int>(1, data.getSize());
assignElementsKernel.setArg<cl_int>(2, numBuckets); assignElementsKernel.setArg<cl_int>(2, numBuckets);
assignElementsKernel.setArg<cl::Buffer>(3, dataRange->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(3, dataRange.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(4, bucketOffset->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(4, bucketOffset.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(5, bucketOfElement->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(5, bucketOfElement.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(6, offsetInBucket->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(6, offsetInBucket.getDeviceBuffer());
context.executeKernel(assignElementsKernel, data.getSize()); context.executeKernel(assignElementsKernel, data.getSize());
// Compute the position of each bucket. // Compute the position of each bucket.
computeBucketPositionsKernel.setArg<cl_int>(0, numBuckets); computeBucketPositionsKernel.setArg<cl_int>(0, numBuckets);
computeBucketPositionsKernel.setArg<cl::Buffer>(1, bucketOffset->getDeviceBuffer()); computeBucketPositionsKernel.setArg<cl::Buffer>(1, bucketOffset.getDeviceBuffer());
computeBucketPositionsKernel.setArg(2, positionsKernelSize*sizeof(cl_int), NULL); computeBucketPositionsKernel.setArg(2, positionsKernelSize*sizeof(cl_int), NULL);
context.executeKernel(computeBucketPositionsKernel, positionsKernelSize, positionsKernelSize); context.executeKernel(computeBucketPositionsKernel, positionsKernelSize, positionsKernelSize);
// Copy the data into the buckets. // Copy the data into the buckets.
copyToBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(1, buckets->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer());
copyToBucketsKernel.setArg<cl_int>(2, data.getSize()); copyToBucketsKernel.setArg<cl_int>(2, data.getSize());
copyToBucketsKernel.setArg<cl::Buffer>(3, bucketOffset->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(3, bucketOffset.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(4, bucketOfElement->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(4, bucketOfElement.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(5, offsetInBucket->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(5, offsetInBucket.getDeviceBuffer());
context.executeKernel(copyToBucketsKernel, data.getSize()); context.executeKernel(copyToBucketsKernel, data.getSize());
// Sort each bucket. // Sort each bucket.
sortBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); sortBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
sortBucketsKernel.setArg<cl::Buffer>(1, buckets->getDeviceBuffer()); sortBucketsKernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer());
sortBucketsKernel.setArg<cl_int>(2, numBuckets); sortBucketsKernel.setArg<cl_int>(2, numBuckets);
sortBucketsKernel.setArg<cl::Buffer>(3, bucketOffset->getDeviceBuffer()); sortBucketsKernel.setArg<cl::Buffer>(3, bucketOffset.getDeviceBuffer());
sortBucketsKernel.setArg(4, sortKernelSize*trait->getDataSize(), NULL); sortBucketsKernel.setArg(4, sortKernelSize*trait->getDataSize(), NULL);
context.executeKernel(sortBucketsKernel, ((data.getSize()+sortKernelSize-1)/sortKernelSize)*sortKernelSize, sortKernelSize); context.executeKernel(sortBucketsKernel, ((data.getSize()+sortKernelSize-1)/sortKernelSize)*sortKernelSize, sortKernelSize);
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -107,13 +107,6 @@ private: ...@@ -107,13 +107,6 @@ private:
const DrudeForce& force; const DrudeForce& force;
}; };
OpenCLCalcDrudeForceKernel::~OpenCLCalcDrudeForceKernel() {
if (particleParams != NULL)
delete particleParams;
if (pairParams != NULL)
delete pairParams;
}
void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) { void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
int numContexts = cl.getPlatformData().contexts.size(); int numContexts = cl.getPlatformData().contexts.size();
int startParticleIndex = cl.getContextIndex()*force.getNumParticles()/numContexts; int startParticleIndex = cl.getContextIndex()*force.getNumParticles()/numContexts;
...@@ -123,7 +116,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -123,7 +116,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the harmonic interaction . // Create the harmonic interaction .
vector<vector<int> > atoms(numParticles, vector<int>(5)); vector<vector<int> > atoms(numParticles, vector<int>(5));
particleParams = OpenCLArray::create<mm_float4>(cl, numParticles, "drudeParticleParams"); particleParams.initialize<mm_float4>(cl, numParticles, "drudeParticleParams");
vector<mm_float4> paramVector(numParticles); vector<mm_float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
double charge, polarizability, aniso12, aniso34; double charge, polarizability, aniso12, aniso34;
...@@ -145,9 +138,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -145,9 +138,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
} }
paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f); paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f);
} }
particleParams->upload(paramVector); particleParams.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams->getDeviceBuffer(), "float4"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams.getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
} }
int startPairIndex = cl.getContextIndex()*force.getNumScreenedPairs()/numContexts; int startPairIndex = cl.getContextIndex()*force.getNumScreenedPairs()/numContexts;
...@@ -157,7 +150,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -157,7 +150,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the screened interaction between dipole pairs. // Create the screened interaction between dipole pairs.
vector<vector<int> > atoms(numPairs, vector<int>(4)); vector<vector<int> > atoms(numPairs, vector<int>(4));
pairParams = OpenCLArray::create<mm_float2>(cl, numPairs, "drudePairParams"); pairParams.initialize<mm_float2>(cl, numPairs, "drudePairParams");
vector<mm_float2> paramVector(numPairs); vector<mm_float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) { for (int i = 0; i < numPairs; i++) {
int drude1, drude2; int drude1, drude2;
...@@ -171,9 +164,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -171,9 +164,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
double energyScale = ONE_4PI_EPS0*charge1*charge2; double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = mm_float2((float) screeningScale, (float) energyScale); paramVector[i] = mm_float2((float) screeningScale, (float) energyScale);
} }
pairParams->upload(paramVector); pairParams.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams->getDeviceBuffer(), "float2"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams.getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudePairForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
} }
cl.addForce(new OpenCLDrudeForceInfo(force)); cl.addForce(new OpenCLDrudeForceInfo(force));
...@@ -192,7 +185,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -192,7 +185,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int endParticleIndex = (cl.getContextIndex()+1)*force.getNumParticles()/numContexts; int endParticleIndex = (cl.getContextIndex()+1)*force.getNumParticles()/numContexts;
int numParticles = endParticleIndex-startParticleIndex; int numParticles = endParticleIndex-startParticleIndex;
if (numParticles > 0) { if (numParticles > 0) {
if (particleParams == NULL || numParticles != particleParams->getSize()) if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
throw OpenMMException("updateParametersInContext: The number of Drude particles has changed"); throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
vector<mm_float4> paramVector(numParticles); vector<mm_float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -211,7 +204,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -211,7 +204,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
k2 = 0; k2 = 0;
paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f); paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f);
} }
particleParams->upload(paramVector); particleParams.upload(paramVector);
} }
// Set the pair parameters. // Set the pair parameters.
...@@ -220,7 +213,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -220,7 +213,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int endPairIndex = (cl.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts; int endPairIndex = (cl.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts;
int numPairs = endPairIndex-startPairIndex; int numPairs = endPairIndex-startPairIndex;
if (numPairs > 0) { if (numPairs > 0) {
if (pairParams == NULL || numPairs != pairParams->getSize()) if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
throw OpenMMException("updateParametersInContext: The number of screened pairs has changed"); throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
vector<mm_float2> paramVector(numPairs); vector<mm_float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) { for (int i = 0; i < numPairs; i++) {
...@@ -235,17 +228,10 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -235,17 +228,10 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
double energyScale = ONE_4PI_EPS0*charge1*charge2; double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = mm_float2((float) screeningScale, (float) energyScale); paramVector[i] = mm_float2((float) screeningScale, (float) energyScale);
} }
pairParams->upload(paramVector); pairParams.upload(paramVector);
} }
} }
OpenCLIntegrateDrudeLangevinStepKernel::~OpenCLIntegrateDrudeLangevinStepKernel() {
if (normalParticles != NULL)
delete normalParticles;
if (pairParticles != NULL)
delete pairParticles;
}
void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) { void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
...@@ -266,12 +252,12 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co ...@@ -266,12 +252,12 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
pairParticleVec.push_back(mm_int2(p, p1)); pairParticleVec.push_back(mm_int2(p, p1));
} }
normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end()); normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
normalParticles = OpenCLArray::create<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles"); normalParticles.initialize<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles = OpenCLArray::create<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles"); pairParticles.initialize<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles");
if (normalParticleVec.size() > 0) if (normalParticleVec.size() > 0)
normalParticles->upload(normalParticleVec); normalParticles.upload(normalParticleVec);
if (pairParticleVec.size() > 0) if (pairParticleVec.size() > 0)
pairParticles->upload(pairParticleVec); pairParticles.upload(pairParticleVec);
// Create kernels. // Create kernels.
...@@ -296,8 +282,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -296,8 +282,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, normalParticles->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(3, normalParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, pairParticles->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(4, pairParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getStepSize().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(5, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(12, integration.getRandom().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(12, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
...@@ -314,7 +300,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -314,7 +300,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
else else
hardwallKernel.setArg<void*>(1, NULL); hardwallKernel.setArg<void*>(1, NULL);
hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer()); hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(3, pairParticles->getDeviceBuffer()); hardwallKernel.setArg<cl::Buffer>(3, pairParticles.getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer()); hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
} }
...@@ -363,7 +349,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -363,7 +349,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Call the first integration kernel. // Call the first integration kernel.
kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles->getSize()+2*pairParticles->getSize())); kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize()));
cl.executeKernel(kernel1, numAtoms); cl.executeKernel(kernel1, numAtoms);
// Apply constraints. // Apply constraints.
...@@ -377,7 +363,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -377,7 +363,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Apply hard wall constraints. // Apply hard wall constraints.
if (maxDrudeDistance > 0) if (maxDrudeDistance > 0)
cl.executeKernel(hardwallKernel, pairParticles->getSize()); cl.executeKernel(hardwallKernel, pairParticles.getSize());
integration.computeVirtualSites(); integration.computeVirtualSites();
// Update the time and step count. // Update the time and step count.
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,9 +45,8 @@ namespace OpenMM { ...@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel { class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel {
public: public:
OpenCLCalcDrudeForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : OpenCLCalcDrudeForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
CalcDrudeForceKernel(name, platform), cl(cl), particleParams(NULL), pairParams(NULL) { CalcDrudeForceKernel(name, platform), cl(cl) {
} }
~OpenCLCalcDrudeForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -73,8 +72,8 @@ public: ...@@ -73,8 +72,8 @@ public:
void copyParametersToContext(ContextImpl& context, const DrudeForce& force); void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private: private:
OpenCLContext& cl; OpenCLContext& cl;
OpenCLArray* particleParams; OpenCLArray particleParams;
OpenCLArray* pairParams; OpenCLArray pairParams;
}; };
/** /**
...@@ -83,9 +82,8 @@ private: ...@@ -83,9 +82,8 @@ private:
class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel { class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public: public:
OpenCLIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : OpenCLIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false), normalParticles(NULL), pairParticles(NULL) { IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
} }
~OpenCLIntegrateDrudeLangevinStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -112,8 +110,8 @@ private: ...@@ -112,8 +110,8 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
double prevStepSize; double prevStepSize;
OpenCLArray* normalParticles; OpenCLArray normalParticles;
OpenCLArray* pairParticles; OpenCLArray pairParticles;
cl::Kernel kernel1, kernel2, hardwallKernel; cl::Kernel kernel1, kernel2, hardwallKernel;
}; };
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -41,19 +41,6 @@ ...@@ -41,19 +41,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) { void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies(); numCopies = integrator.getNumCopies();
...@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."); throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
int paddedParticles = cl.getPaddedNumAtoms(); int paddedParticles = cl.getPaddedNumAtoms();
int forceElementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int forceElementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
forces = new OpenCLArray(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces"); forces.initialize(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
bool useDoublePrecision = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()); bool useDoublePrecision = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4));
positions = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdPositions"); positions.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities"); velocities.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans. // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if (useDoublePrecision) { if (useDoublePrecision) {
vector<mm_double4> temp(positions->getSize()); vector<mm_double4> temp(positions.getSize());
for (int i = 0; i < positions->getSize(); i++) for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 0); temp[i] = mm_double4(0, 0, 0, 0);
positions->upload(temp); positions.upload(temp);
for (int i = 0; i < velocities->getSize(); i++) for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 1); temp[i] = mm_double4(0, 0, 0, 1);
velocities->upload(temp); velocities.upload(temp);
} }
else { else {
vector<mm_float4> temp(positions->getSize()); vector<mm_float4> temp(positions.getSize());
for (int i = 0; i < positions->getSize(); i++) for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 0); temp[i] = mm_float4(0, 0, 0, 0);
positions->upload(temp); positions.upload(temp);
for (int i = 0; i < velocities->getSize(); i++) for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 1); temp[i] = mm_float4(0, 0, 0, 1);
velocities->upload(temp); velocities.upload(temp);
} }
// Build a list of contractions. // Build a list of contractions.
...@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
} }
} }
if (maxContractedCopies > 0) { if (maxContractedCopies > 0) {
contractedForces = new OpenCLArray(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces"); contractedForces.initialize(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
contractedPositions = new OpenCLArray(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions"); contractedPositions.initialize(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
} }
// Create kernels. // Create kernels.
...@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) {
hasInitializedKernel = true; hasInitializedKernel = true;
pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); pileKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer()); stepKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(1, velocities->getDeviceBuffer()); stepKernel.setArg<cl::Buffer>(1, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(2, forces->getDeviceBuffer()); stepKernel.setArg<cl::Buffer>(2, forces.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); velocitiesKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); velocitiesKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(3, velocities->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(3, velocities.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer());
for (auto& g : groupsByCopies) { for (auto& g : groupsByCopies) {
int copies = g.first; int copies = g.first;
positionContractionKernels[copies].setArg<cl::Buffer>(0, positions->getDeviceBuffer()); positionContractionKernels[copies].setArg<cl::Buffer>(0, positions.getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions->getDeviceBuffer()); positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(0, forces->getDeviceBuffer()); forceContractionKernels[copies].setArg<cl::Buffer>(0, forces.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer()); forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
} }
} }
...@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction. // Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(5, positions.getDeviceBuffer());
for (int i = 0; i < numCopies; i++) { for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(5, i); copyToContextKernel.setArg<cl_int>(5, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms()); cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
...@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Now loop over contractions and compute forces from them. // Now loop over contractions and compute forces from them.
if (groupsByCopies.size() > 0) { if (groupsByCopies.size() > 0) {
copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions.getDeviceBuffer());
for (auto& g : groupsByCopies) { for (auto& g : groupsByCopies) {
int copies = g.first; int copies = g.first;
int groupFlags = g.second; int groupFlags = g.second;
...@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if (groupsByCopies.size() > 0) { if (groupsByCopies.size() > 0) {
// Ensure the Context contains the positions from the last copy, since we'll assume that later. // Ensure the Context contains the positions from the last copy, since we'll assume that later.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, numCopies-1); copyToContextKernel.setArg<cl_int>(5, numCopies-1);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms()); cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
} }
...@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, ...@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
} }
void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) { void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL) if (!positions.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles) if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
...@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p ...@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
cl.getPosq().download(posq); cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w); posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]); cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
} }
else if (cl.getUseMixedPrecision()) { else if (cl.getUseMixedPrecision()) {
vector<mm_float4> posqf(cl.getPaddedNumAtoms()); vector<mm_float4> posqf(cl.getPaddedNumAtoms());
...@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p ...@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
vector<mm_double4> posq(cl.getPaddedNumAtoms()); vector<mm_double4> posq(cl.getPaddedNumAtoms());
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w); posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]); cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
} }
else { else {
vector<mm_float4> posq(cl.getPaddedNumAtoms()); vector<mm_float4> posq(cl.getPaddedNumAtoms());
cl.getPosq().download(posq); cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = mm_float4((cl_float) offsetPos[i][0], (cl_float) offsetPos[i][1], (cl_float) offsetPos[i][2], posq[i].w); posq[i] = mm_float4((cl_float) offsetPos[i][0], (cl_float) offsetPos[i][1], (cl_float) offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]); cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
} }
} }
void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) { void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL) if (!velocities.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles) if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
...@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ...@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl.getVelm().download(velm); cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w); velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]); cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
} }
else { else {
vector<mm_float4> velm(cl.getPaddedNumAtoms()); vector<mm_float4> velm(cl.getPaddedNumAtoms());
cl.getVelm().download(velm); cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
velm[i] = mm_float4((cl_float) vel[i][0], (cl_float) vel[i][1], (cl_float) vel[i][2], velm[i].w); velm[i] = mm_float4((cl_float) vel[i][0], (cl_float) vel[i][1], (cl_float) vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]); cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
} }
} }
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
if (!hasInitializedKernel) if (!hasInitializedKernel)
initializeKernels(context); initializeKernels(context);
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, copy); copyToContextKernel.setArg<cl_int>(5, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms()); cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,9 +45,8 @@ namespace OpenMM { ...@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel { class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public: public:
OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) { IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false) {
} }
~OpenCLIntegrateRPMDStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -92,11 +91,11 @@ private: ...@@ -92,11 +91,11 @@ private:
int numCopies, numParticles, workgroupSize; int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies; std::map<int, int> groupsByCopies;
int groupsNotContracted; int groupsNotContracted;
OpenCLArray* forces; OpenCLArray forces;
OpenCLArray* positions; OpenCLArray positions;
OpenCLArray* velocities; OpenCLArray velocities;
OpenCLArray* contractedForces; OpenCLArray contractedForces;
OpenCLArray* contractedPositions; OpenCLArray contractedPositions;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel; cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, cl::Kernel> positionContractionKernels; std::map<int, cl::Kernel> positionContractionKernels;
std::map<int, cl::Kernel> forceContractionKernels; std::map<int, cl::Kernel> forceContractionKernels;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment