"vscode:/vscode.git/clone" did not exist on "3f0899c0e95c8a1879e4f9bce4ec60d3ab6dc18c"
Commit b33ee3b0 authored by peastman's avatar peastman
Browse files

More conversion of OpenCLArrays

parent d59b0373
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -81,7 +81,6 @@ namespace OpenMM { ...@@ -81,7 +81,6 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLBondedUtilities { class OPENMM_EXPORT_OPENCL OpenCLBondedUtilities {
public: public:
OpenCLBondedUtilities(OpenCLContext& context); OpenCLBondedUtilities(OpenCLContext& context);
~OpenCLBondedUtilities();
/** /**
* Add a bonded interaction. * Add a bonded interaction.
* *
...@@ -143,8 +142,8 @@ private: ...@@ -143,8 +142,8 @@ private:
std::vector<std::vector<int> > forceSets; std::vector<std::vector<int> > forceSets;
std::vector<cl::Memory*> arguments; std::vector<cl::Memory*> arguments;
std::vector<std::string> argTypes; std::vector<std::string> argTypes;
std::vector<OpenCLArray*> atomIndices; std::vector<OpenCLArray> atomIndices;
std::vector<OpenCLArray*> bufferIndices; std::vector<OpenCLArray> bufferIndices;
std::vector<std::string> prefixCode; std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives; std::vector<std::string> energyParameterDerivatives;
int numForceBuffers, maxBonds, allGroups; int numForceBuffers, maxBonds, allGroups;
......
...@@ -33,11 +33,10 @@ namespace OpenMM { ...@@ -33,11 +33,10 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLCompact { class OPENMM_EXPORT_OPENCL OpenCLCompact {
public: public:
OpenCLCompact(OpenCLContext& context); OpenCLCompact(OpenCLContext& context);
~OpenCLCompact();
void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid); void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid);
private: private:
OpenCLContext& context; OpenCLContext& context;
OpenCLArray* dgBlockCounts; OpenCLArray dgBlockCounts;
cl::Kernel countKernel; cl::Kernel countKernel;
cl::Kernel moveValidKernel; cl::Kernel moveValidKernel;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -42,25 +42,24 @@ namespace OpenMM { ...@@ -42,25 +42,24 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLIntegrationUtilities { class OPENMM_EXPORT_OPENCL OpenCLIntegrationUtilities {
public: public:
OpenCLIntegrationUtilities(OpenCLContext& context, const System& system); OpenCLIntegrationUtilities(OpenCLContext& context, const System& system);
~OpenCLIntegrationUtilities();
/** /**
* Get the array which contains position deltas. * Get the array which contains position deltas.
*/ */
OpenCLArray& getPosDelta() { OpenCLArray& getPosDelta() {
return *posDelta; return posDelta;
} }
/** /**
* Get the array which contains random values. Each element is a float4, whose components * Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1. * are independent, normally distributed random numbers with mean 0 and variance 1.
*/ */
OpenCLArray& getRandom() { OpenCLArray& getRandom() {
return *random; return random;
} }
/** /**
* Get the array which contains the current step size. * Get the array which contains the current step size.
*/ */
OpenCLArray& getStepSize() { OpenCLArray& getStepSize() {
return *stepSize; return stepSize;
} }
/** /**
* Set the size to use for the next step. * Set the size to use for the next step.
...@@ -131,36 +130,36 @@ private: ...@@ -131,36 +130,36 @@ private:
cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel; cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel;
cl::Kernel vsitePositionKernel, vsiteForceKernel, vsiteAddForcesKernel; cl::Kernel vsitePositionKernel, vsiteForceKernel, vsiteAddForcesKernel;
cl::Kernel randomKernel, timeShiftKernel; cl::Kernel randomKernel, timeShiftKernel;
OpenCLArray* posDelta; OpenCLArray posDelta;
OpenCLArray* settleAtoms; OpenCLArray settleAtoms;
OpenCLArray* settleParams; OpenCLArray settleParams;
OpenCLArray* shakeAtoms; OpenCLArray shakeAtoms;
OpenCLArray* shakeParams; OpenCLArray shakeParams;
OpenCLArray* random; OpenCLArray random;
OpenCLArray* randomSeed; OpenCLArray randomSeed;
OpenCLArray* stepSize; OpenCLArray stepSize;
OpenCLArray* ccmaAtoms; OpenCLArray ccmaAtoms;
OpenCLArray* ccmaDistance; OpenCLArray ccmaDistance;
OpenCLArray* ccmaReducedMass; OpenCLArray ccmaReducedMass;
OpenCLArray* ccmaAtomConstraints; OpenCLArray ccmaAtomConstraints;
OpenCLArray* ccmaNumAtomConstraints; OpenCLArray ccmaNumAtomConstraints;
OpenCLArray* ccmaConstraintMatrixColumn; OpenCLArray ccmaConstraintMatrixColumn;
OpenCLArray* ccmaConstraintMatrixValue; OpenCLArray ccmaConstraintMatrixValue;
OpenCLArray* ccmaDelta1; OpenCLArray ccmaDelta1;
OpenCLArray* ccmaDelta2; OpenCLArray ccmaDelta2;
OpenCLArray* ccmaConverged; OpenCLArray ccmaConverged;
OpenCLArray* ccmaConvergedHostBuffer; OpenCLArray ccmaConvergedHostBuffer;
OpenCLArray* vsite2AvgAtoms; OpenCLArray vsite2AvgAtoms;
OpenCLArray* vsite2AvgWeights; OpenCLArray vsite2AvgWeights;
OpenCLArray* vsite3AvgAtoms; OpenCLArray vsite3AvgAtoms;
OpenCLArray* vsite3AvgWeights; OpenCLArray vsite3AvgWeights;
OpenCLArray* vsiteOutOfPlaneAtoms; OpenCLArray vsiteOutOfPlaneAtoms;
OpenCLArray* vsiteOutOfPlaneWeights; OpenCLArray vsiteOutOfPlaneWeights;
OpenCLArray* vsiteLocalCoordsIndex; OpenCLArray vsiteLocalCoordsIndex;
OpenCLArray* vsiteLocalCoordsAtoms; OpenCLArray vsiteLocalCoordsAtoms;
OpenCLArray* vsiteLocalCoordsWeights; OpenCLArray vsiteLocalCoordsWeights;
OpenCLArray* vsiteLocalCoordsPos; OpenCLArray vsiteLocalCoordsPos;
OpenCLArray* vsiteLocalCoordsStartIndex; OpenCLArray vsiteLocalCoordsStartIndex;
int randomPos; int randomPos;
int lastSeed, numVsites; int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites; bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites;
......
...@@ -1132,12 +1132,8 @@ private: ...@@ -1132,12 +1132,8 @@ private:
class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel { class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public: public:
OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl), OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL), hasInitializedKernels(false) {
exceptionParams(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
} }
~OpenCLCalcGayBerneForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1169,25 +1165,25 @@ private: ...@@ -1169,25 +1165,25 @@ private:
bool hasInitializedKernels; bool hasInitializedKernels;
int numRealParticles, maxNeighborBlocks; int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod; GayBerneForce::NonbondedMethod nonbondedMethod;
OpenCLArray* sortedParticles; OpenCLArray sortedParticles;
OpenCLArray* axisParticleIndices; OpenCLArray axisParticleIndices;
OpenCLArray* sigParams; OpenCLArray sigParams;
OpenCLArray* epsParams; OpenCLArray epsParams;
OpenCLArray* scale; OpenCLArray scale;
OpenCLArray* exceptionParticles; OpenCLArray exceptionParticles;
OpenCLArray* exceptionParams; OpenCLArray exceptionParams;
OpenCLArray* aMatrix; OpenCLArray aMatrix;
OpenCLArray* bMatrix; OpenCLArray bMatrix;
OpenCLArray* gMatrix; OpenCLArray gMatrix;
OpenCLArray* exclusions; OpenCLArray exclusions;
OpenCLArray* exclusionStartIndex; OpenCLArray exclusionStartIndex;
OpenCLArray* blockCenter; OpenCLArray blockCenter;
OpenCLArray* blockBoundingBox; OpenCLArray blockBoundingBox;
OpenCLArray* neighbors; OpenCLArray neighbors;
OpenCLArray* neighborIndex; OpenCLArray neighborIndex;
OpenCLArray* neighborBlockCount; OpenCLArray neighborBlockCount;
OpenCLArray* sortedPos; OpenCLArray sortedPos;
OpenCLArray* torque; OpenCLArray torque;
std::vector<bool> isRealParticle; std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs; std::vector<std::pair<int, int> > excludedPairs;
...@@ -1200,9 +1196,8 @@ private: ...@@ -1200,9 +1196,8 @@ private:
class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel { class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public: public:
OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform), OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform),
cl(cl), hasInitializedKernels(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) { cl(cl), hasInitializedKernels(false) {
} }
~OpenCLCalcCustomCVForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1236,9 +1231,9 @@ private: ...@@ -1236,9 +1231,9 @@ private:
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames; std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions; std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions; std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<OpenCLArray*> cvForces; std::vector<OpenCLArray> cvForces;
OpenCLArray* invAtomOrder; OpenCLArray invAtomOrder;
OpenCLArray* innerInvAtomOrder; OpenCLArray innerInvAtomOrder;
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel; cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
}; };
...@@ -1247,10 +1242,8 @@ private: ...@@ -1247,10 +1242,8 @@ private:
*/ */
class OpenCLCalcRMSDForceKernel : public CalcRMSDForceKernel { class OpenCLCalcRMSDForceKernel : public CalcRMSDForceKernel {
public: public:
OpenCLCalcRMSDForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcRMSDForceKernel(name, platform), OpenCLCalcRMSDForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcRMSDForceKernel(name, platform), cl(cl) {
cl(cl), referencePos(NULL), particles(NULL), buffer(NULL) {
} }
~OpenCLCalcRMSDForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1289,9 +1282,9 @@ private: ...@@ -1289,9 +1282,9 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
double sumNormRef; double sumNormRef;
OpenCLArray* referencePos; OpenCLArray referencePos;
OpenCLArray* particles; OpenCLArray particles;
OpenCLArray* buffer; OpenCLArray buffer;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
...@@ -1337,9 +1330,8 @@ private: ...@@ -1337,9 +1330,8 @@ private:
class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel { class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public: public:
OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl), OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), params(NULL) { hasInitializedKernels(false) {
} }
~OpenCLIntegrateLangevinStepKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1365,7 +1357,7 @@ private: ...@@ -1365,7 +1357,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize; double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels; bool hasInitializedKernels;
OpenCLArray* params; OpenCLArray params;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
...@@ -1451,9 +1443,8 @@ private: ...@@ -1451,9 +1443,8 @@ private:
class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel { class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
public: public:
OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl), OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), params(NULL) { hasInitializedKernels(false) {
} }
~OpenCLIntegrateVariableLangevinStepKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1481,7 +1472,7 @@ private: ...@@ -1481,7 +1472,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int blockSize; int blockSize;
OpenCLArray* params; OpenCLArray params;
cl::Kernel kernel1, kernel2, selectSizeKernel; cl::Kernel kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol; double prevTemp, prevFriction, prevErrorTol;
}; };
...@@ -1493,8 +1484,7 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel { ...@@ -1493,8 +1484,7 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public: public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER}; enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl), OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
} }
~OpenCLIntegrateCustomStepKernel(); ~OpenCLIntegrateCustomStepKernel();
/** /**
...@@ -1575,15 +1565,15 @@ private: ...@@ -1575,15 +1565,15 @@ private:
int numGlobalVariables, sumWorkGroupSize; int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
OpenCLArray* globalValues; OpenCLArray globalValues;
OpenCLArray* sumBuffer; OpenCLArray sumBuffer;
OpenCLArray* summedValue; OpenCLArray summedValue;
OpenCLArray* uniformRandoms; OpenCLArray uniformRandoms;
OpenCLArray* randomSeed; OpenCLArray randomSeed;
OpenCLArray* perDofEnergyParamDerivs; OpenCLArray perDofEnergyParamDerivs;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
std::map<int, double> savedEnergy; std::map<int, double> savedEnergy;
std::map<int, OpenCLArray*> savedForces; std::map<int, OpenCLArray> savedForces;
std::set<int> validSavedForces; std::set<int> validSavedForces;
OpenCLParameterSet* perDofValues; OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat; mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat;
...@@ -1635,9 +1625,8 @@ public: ...@@ -1635,9 +1625,8 @@ public:
class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel { class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
public: public:
OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl), OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl),
hasInitializedKernels(false), atomGroups(NULL) { hasInitializedKernels(false) {
} }
~OpenCLApplyAndersenThermostatKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1655,7 +1644,7 @@ private: ...@@ -1655,7 +1644,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int randomSeed; int randomSeed;
OpenCLArray* atomGroups; OpenCLArray atomGroups;
cl::Kernel kernel; cl::Kernel kernel;
}; };
...@@ -1665,9 +1654,8 @@ private: ...@@ -1665,9 +1654,8 @@ private:
class OpenCLApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel { class OpenCLApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public: public:
OpenCLApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyMonteCarloBarostatKernel(name, platform), cl(cl), OpenCLApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyMonteCarloBarostatKernel(name, platform), cl(cl),
hasInitializedKernels(false), savedPositions(NULL), savedForces(NULL), moleculeAtoms(NULL), moleculeStartIndex(NULL) { hasInitializedKernels(false) {
} }
~OpenCLApplyMonteCarloBarostatKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1699,10 +1687,10 @@ private: ...@@ -1699,10 +1687,10 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int numMolecules; int numMolecules;
OpenCLArray* savedPositions; OpenCLArray savedPositions;
OpenCLArray* savedForces; OpenCLArray savedForces;
OpenCLArray* moleculeAtoms; OpenCLArray moleculeAtoms;
OpenCLArray* moleculeStartIndex; OpenCLArray moleculeStartIndex;
cl::Kernel kernel; cl::Kernel kernel;
std::vector<int> lastAtomOrder; std::vector<int> lastAtomOrder;
}; };
...@@ -1712,9 +1700,8 @@ private: ...@@ -1712,9 +1700,8 @@ private:
*/ */
class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel { class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel {
public: public:
OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl), cmMomentum(NULL) { OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl) {
} }
~OpenCLRemoveCMMotionKernel();
/** /**
* Initialize the kernel, setting up the particle masses. * Initialize the kernel, setting up the particle masses.
* *
...@@ -1731,7 +1718,7 @@ public: ...@@ -1731,7 +1718,7 @@ public:
private: private:
OpenCLContext& cl; OpenCLContext& cl;
int frequency; int frequency;
OpenCLArray* cmMomentum; OpenCLArray cmMomentum;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -175,55 +175,55 @@ public: ...@@ -175,55 +175,55 @@ public:
* Get the array containing the center of each atom block. * Get the array containing the center of each atom block.
*/ */
OpenCLArray& getBlockCenters() { OpenCLArray& getBlockCenters() {
return *blockCenter; return blockCenter;
} }
/** /**
* Get the array containing the dimensions of each atom block. * Get the array containing the dimensions of each atom block.
*/ */
OpenCLArray& getBlockBoundingBoxes() { OpenCLArray& getBlockBoundingBoxes() {
return *blockBoundingBox; return blockBoundingBox;
} }
/** /**
* Get the array whose first element contains the number of tiles with interactions. * Get the array whose first element contains the number of tiles with interactions.
*/ */
OpenCLArray& getInteractionCount() { OpenCLArray& getInteractionCount() {
return *interactionCount; return interactionCount;
} }
/** /**
* Get the array containing tiles with interactions. * Get the array containing tiles with interactions.
*/ */
OpenCLArray& getInteractingTiles() { OpenCLArray& getInteractingTiles() {
return *interactingTiles; return interactingTiles;
} }
/** /**
* Get the array containing the atoms in each tile with interactions. * Get the array containing the atoms in each tile with interactions.
*/ */
OpenCLArray& getInteractingAtoms() { OpenCLArray& getInteractingAtoms() {
return *interactingAtoms; return interactingAtoms;
} }
/** /**
* Get the array containing exclusion flags. * Get the array containing exclusion flags.
*/ */
OpenCLArray& getExclusions() { OpenCLArray& getExclusions() {
return *exclusions; return exclusions;
} }
/** /**
* Get the array containing tiles with exclusions. * Get the array containing tiles with exclusions.
*/ */
OpenCLArray& getExclusionTiles() { OpenCLArray& getExclusionTiles() {
return *exclusionTiles; return exclusionTiles;
} }
/** /**
* Get the array containing the index into the exclusion array for each tile. * Get the array containing the index into the exclusion array for each tile.
*/ */
OpenCLArray& getExclusionIndices() { OpenCLArray& getExclusionIndices() {
return *exclusionIndices; return exclusionIndices;
} }
/** /**
* Get the array listing where the exclusion data starts for each row. * Get the array listing where the exclusion data starts for each row.
*/ */
OpenCLArray& getExclusionRowIndices() { OpenCLArray& getExclusionRowIndices() {
return *exclusionRowIndices; return exclusionRowIndices;
} }
/** /**
* Get the index of the first tile this context is responsible for processing. * Get the index of the first tile this context is responsible for processing.
...@@ -275,20 +275,20 @@ private: ...@@ -275,20 +275,20 @@ private:
class BlockSortTrait; class BlockSortTrait;
OpenCLContext& context; OpenCLContext& context;
std::map<int, KernelSet> groupKernels; std::map<int, KernelSet> groupKernels;
OpenCLArray* exclusionTiles; OpenCLArray exclusionTiles;
OpenCLArray* exclusions; OpenCLArray exclusions;
OpenCLArray* exclusionIndices; OpenCLArray exclusionIndices;
OpenCLArray* exclusionRowIndices; OpenCLArray exclusionRowIndices;
OpenCLArray* interactingTiles; OpenCLArray interactingTiles;
OpenCLArray* interactingAtoms; OpenCLArray interactingAtoms;
OpenCLArray* interactionCount; OpenCLArray interactionCount;
OpenCLArray* blockCenter; OpenCLArray blockCenter;
OpenCLArray* blockBoundingBox; OpenCLArray blockBoundingBox;
OpenCLArray* sortedBlocks; OpenCLArray sortedBlocks;
OpenCLArray* sortedBlockCenter; OpenCLArray sortedBlockCenter;
OpenCLArray* sortedBlockBoundingBox; OpenCLArray sortedBlockBoundingBox;
OpenCLArray* oldPositions; OpenCLArray oldPositions;
OpenCLArray* rebuildNeighborList; OpenCLArray rebuildNeighborList;
OpenCLSort* blockSorter; OpenCLSort* blockSorter;
cl::Event downloadCountEvent; cl::Event downloadCountEvent;
cl::Buffer* pinnedCountBuffer; cl::Buffer* pinnedCountBuffer;
......
...@@ -84,7 +84,7 @@ private: ...@@ -84,7 +84,7 @@ private:
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions; std::vector<double> contextNonbondedFractions;
std::vector<int> tileCounts; std::vector<int> tileCounts;
OpenCLArray* contextForces; OpenCLArray contextForces;
cl::Buffer* pinnedPositionBuffer; cl::Buffer* pinnedPositionBuffer;
cl::Buffer* pinnedForceBuffer; cl::Buffer* pinnedForceBuffer;
void* pinnedPositionMemory; void* pinnedPositionMemory;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2010-2013 Stanford University and the Authors. * * Portions copyright (c) 2010-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -87,11 +87,11 @@ public: ...@@ -87,11 +87,11 @@ public:
private: private:
OpenCLContext& context; OpenCLContext& context;
SortTrait* trait; SortTrait* trait;
OpenCLArray* dataRange; OpenCLArray dataRange;
OpenCLArray* bucketOfElement; OpenCLArray bucketOfElement;
OpenCLArray* offsetInBucket; OpenCLArray offsetInBucket;
OpenCLArray* bucketOffset; OpenCLArray bucketOffset;
OpenCLArray* buckets; OpenCLArray buckets;
cl::Kernel shortListKernel, computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel; cl::Kernel shortListKernel, computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel;
unsigned int dataLength, rangeKernelSize, positionsKernelSize, sortKernelSize; unsigned int dataLength, rangeKernelSize, positionsKernelSize, sortKernelSize;
bool isShortList; bool isShortList;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -36,13 +36,6 @@ using namespace std; ...@@ -36,13 +36,6 @@ using namespace std;
OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) { OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) {
} }
OpenCLBondedUtilities::~OpenCLBondedUtilities() {
for (int i = 0; i < (int) atomIndices.size(); i++)
delete atomIndices[i];
for (int i = 0; i < (int) bufferIndices.size(); i++)
delete bufferIndices[i];
}
void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, const string& source, int group) { void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, const string& source, int group) {
if (atoms.size() > 0) { if (atoms.size() > 0) {
forceAtoms.push_back(atoms); forceAtoms.push_back(atoms);
...@@ -92,6 +85,7 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -92,6 +85,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
vector<vector<cl_uint> > bufferVec(numForces); vector<vector<cl_uint> > bufferVec(numForces);
vector<vector<int> > bufferCounter(numForces, vector<int>(system.getNumParticles(), 0)); vector<vector<int> > bufferCounter(numForces, vector<int>(system.getNumParticles(), 0));
vector<int> numBuffers(numForces, 0); vector<int> numBuffers(numForces, 0);
atomIndices.resize(numForces);
for (int i = 0; i < numForces; i++) { for (int i = 0; i < numForces; i++) {
int numBonds = forceAtoms[i].size(); int numBonds = forceAtoms[i].size();
int numAtoms = forceAtoms[i][0].size(); int numAtoms = forceAtoms[i][0].size();
...@@ -101,9 +95,8 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -101,9 +95,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][atom]; indexVec[bond*width+atom] = forceAtoms[i][bond][atom];
} }
OpenCLArray* indices = OpenCLArray::create<cl_uint>(context, indexVec.size(), "bondedIndices"); atomIndices[i].initialize<cl_uint>(context, indexVec.size(), "bondedIndices");
indices->upload(indexVec); atomIndices[i].upload(indexVec);
atomIndices.push_back(indices);
bufferVec[i].resize(width*numBonds, 0); bufferVec[i].resize(width*numBonds, 0);
for (int bond = 0; bond < numBonds; bond++) { for (int bond = 0; bond < numBonds; bond++) {
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
...@@ -177,9 +170,8 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -177,9 +170,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int bond = 0; bond < numBonds; bond++) for (int bond = 0; bond < numBonds; bond++)
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]]; bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]];
OpenCLArray* buffers = OpenCLArray::create<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices"); bufferIndices[force].initialize<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices");
buffers->upload(bufferVec[force]); bufferIndices[force].upload(bufferVec[force]);
bufferIndices[force] = buffers;
} }
// Create the kernels. // Create the kernels.
...@@ -291,8 +283,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) { ...@@ -291,8 +283,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
index += 6; index += 6;
for (int j = 0; j < (int) forceSets[i].size(); j++) { for (int j = 0; j < (int) forceSets[i].size(); j++) {
kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]].getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]].getDeviceBuffer());
} }
for (int j = 0; j < (int) arguments.size(); j++) for (int j = 0; j < (int) arguments.size(); j++)
kernel.setArg<cl::Memory>(index++, *arguments[j]); kernel.setArg<cl::Memory>(index++, *arguments[j]);
......
...@@ -29,18 +29,13 @@ ...@@ -29,18 +29,13 @@
using namespace OpenMM; using namespace OpenMM;
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) { OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context) {
dgBlockCounts = OpenCLArray::create<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts"); dgBlockCounts.initialize<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
cl::Program program = context.createProgram(OpenCLKernelSources::compact); cl::Program program = context.createProgram(OpenCLKernelSources::compact);
countKernel = cl::Kernel(program, "countElts"); countKernel = cl::Kernel(program, "countElts");
moveValidKernel = cl::Kernel(program, "moveValidElementsStaged"); moveValidKernel = cl::Kernel(program, "moveValidElementsStaged");
} }
OpenCLCompact::~OpenCLCompact() {
if (dgBlockCounts != NULL)
delete dgBlockCounts;
}
void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid) { void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid) {
// Figure out # elements per block // Figure out # elements per block
unsigned int len = dIn.getSize(); unsigned int len = dIn.getSize();
...@@ -51,7 +46,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr ...@@ -51,7 +46,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
// TODO: implement loop over blocks of 10M // TODO: implement loop over blocks of 10M
// Phase 1: Calculate number of valid elements per thread block // Phase 1: Calculate number of valid elements per thread block
countKernel.setArg<cl::Buffer>(0, dgBlockCounts->getDeviceBuffer()); countKernel.setArg<cl::Buffer>(0, dgBlockCounts.getDeviceBuffer());
countKernel.setArg<cl::Buffer>(1, dValid.getDeviceBuffer()); countKernel.setArg<cl::Buffer>(1, dValid.getDeviceBuffer());
countKernel.setArg<cl_uint>(2, len); countKernel.setArg<cl_uint>(2, len);
countKernel.setArg(3, 128*sizeof(cl_uint), NULL); countKernel.setArg(3, 128*sizeof(cl_uint), NULL);
...@@ -61,7 +56,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr ...@@ -61,7 +56,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
moveValidKernel.setArg<cl::Buffer>(0, dIn.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(0, dIn.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(1, dOut.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(1, dOut.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(2, dValid.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(2, dValid.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(3, dgBlockCounts->getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(3, dgBlockCounts.getDeviceBuffer());
moveValidKernel.setArg<cl_uint>(4, len); moveValidKernel.setArg<cl_uint>(4, len);
moveValidKernel.setArg<cl::Buffer>(5, numValid.getDeviceBuffer()); moveValidKernel.setArg<cl::Buffer>(5, numValid.getDeviceBuffer());
moveValidKernel.setArg(6, 128*sizeof(cl_uint), NULL); moveValidKernel.setArg(6, 128*sizeof(cl_uint), NULL);
......
This diff is collapsed.
...@@ -118,14 +118,12 @@ private: ...@@ -118,14 +118,12 @@ private:
OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) : OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()), CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
tileCounts(data.contexts.size()), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedPositionMemory(NULL), pinnedForceBuffer(NULL), pinnedForceMemory(NULL) { tileCounts(data.contexts.size()), pinnedPositionBuffer(NULL), pinnedPositionMemory(NULL), pinnedForceBuffer(NULL), pinnedForceMemory(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++) for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcForcesAndEnergyKernel(name, platform, *data.contexts[i]))); kernels.push_back(Kernel(new OpenCLCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
} }
OpenCLParallelCalcForcesAndEnergyKernel::~OpenCLParallelCalcForcesAndEnergyKernel() { OpenCLParallelCalcForcesAndEnergyKernel::~OpenCLParallelCalcForcesAndEnergyKernel() {
if (contextForces != NULL)
delete contextForces;
if (pinnedPositionBuffer != NULL) if (pinnedPositionBuffer != NULL)
delete pinnedPositionBuffer; delete pinnedPositionBuffer;
if (pinnedForceBuffer != NULL) if (pinnedForceBuffer != NULL)
...@@ -142,8 +140,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -142,8 +140,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
OpenCLContext& cl0 = *data.contexts[0]; OpenCLContext& cl0 = *data.contexts[0];
int elementSize = (cl0.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (cl0.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
if (contextForces == NULL) { if (!contextForces.isInitialized()) {
contextForces = OpenCLArray::create<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(), contextForces.initialize<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces"); data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces");
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*elementSize; int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*elementSize;
pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
...@@ -179,9 +177,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c ...@@ -179,9 +177,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext& cl = *data.contexts[0]; OpenCLContext& cl = *data.contexts[0];
int numAtoms = cl.getPaddedNumAtoms(); int numAtoms = cl.getPaddedNumAtoms();
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*elementSize, cl.getQueue().enqueueWriteBuffer(contextForces.getDeviceBuffer(), CL_FALSE, numAtoms*elementSize,
numAtoms*(data.contexts.size()-1)*elementSize, pinnedForceMemory); numAtoms*(data.contexts.size()-1)*elementSize, pinnedForceMemory);
cl.reduceBuffer(*contextForces, data.contexts.size()); cl.reduceBuffer(contextForces, data.contexts.size());
// Balance work between the contexts by transferring a little nonbonded work from the context that // Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first. // finished last to the one that finished first.
......
...@@ -31,8 +31,7 @@ ...@@ -31,8 +31,7 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait), OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait), dataLength(length) {
dataRange(NULL), bucketOfElement(NULL), offsetInBucket(NULL), bucketOffset(NULL), buckets(NULL), dataLength(length) {
// Create kernels. // Create kernels.
std::map<std::string, std::string> replacements; std::map<std::string, std::string> replacements;
...@@ -81,26 +80,16 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le ...@@ -81,26 +80,16 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
// Create workspace arrays. // Create workspace arrays.
if (!isShortList) { if (!isShortList) {
dataRange = new OpenCLArray(context, 2, trait->getKeySize(), "sortDataRange"); dataRange.initialize(context, 2, trait->getKeySize(), "sortDataRange");
bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset"); bucketOffset.initialize<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement"); bucketOfElement.initialize<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket"); offsetInBucket.initialize<cl_uint>(context, length, "offsetInBucket");
buckets = new OpenCLArray(context, length, trait->getDataSize(), "buckets"); buckets.initialize(context, length, trait->getDataSize(), "buckets");
} }
} }
OpenCLSort::~OpenCLSort() { OpenCLSort::~OpenCLSort() {
delete trait; delete trait;
if (dataRange != NULL)
delete dataRange;
if (bucketOfElement != NULL)
delete bucketOfElement;
if (offsetInBucket != NULL)
delete offsetInBucket;
if (bucketOffset != NULL)
delete bucketOffset;
if (buckets != NULL)
delete buckets;
} }
void OpenCLSort::sort(OpenCLArray& data) { void OpenCLSort::sort(OpenCLArray& data) {
...@@ -119,14 +108,14 @@ void OpenCLSort::sort(OpenCLArray& data) { ...@@ -119,14 +108,14 @@ void OpenCLSort::sort(OpenCLArray& data) {
else { else {
// Compute the range of data values. // Compute the range of data values.
unsigned int numBuckets = bucketOffset->getSize(); unsigned int numBuckets = bucketOffset.getSize();
computeRangeKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); computeRangeKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
computeRangeKernel.setArg<cl_uint>(1, data.getSize()); computeRangeKernel.setArg<cl_uint>(1, data.getSize());
computeRangeKernel.setArg<cl::Buffer>(2, dataRange->getDeviceBuffer()); computeRangeKernel.setArg<cl::Buffer>(2, dataRange.getDeviceBuffer());
computeRangeKernel.setArg(3, rangeKernelSize*trait->getKeySize(), NULL); computeRangeKernel.setArg(3, rangeKernelSize*trait->getKeySize(), NULL);
computeRangeKernel.setArg(4, rangeKernelSize*trait->getKeySize(), NULL); computeRangeKernel.setArg(4, rangeKernelSize*trait->getKeySize(), NULL);
computeRangeKernel.setArg<cl_int>(5, numBuckets); computeRangeKernel.setArg<cl_int>(5, numBuckets);
computeRangeKernel.setArg<cl::Buffer>(6, bucketOffset->getDeviceBuffer()); computeRangeKernel.setArg<cl::Buffer>(6, bucketOffset.getDeviceBuffer());
context.executeKernel(computeRangeKernel, rangeKernelSize, rangeKernelSize); context.executeKernel(computeRangeKernel, rangeKernelSize, rangeKernelSize);
// Assign array elements to buckets. // Assign array elements to buckets.
...@@ -134,35 +123,35 @@ void OpenCLSort::sort(OpenCLArray& data) { ...@@ -134,35 +123,35 @@ void OpenCLSort::sort(OpenCLArray& data) {
assignElementsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
assignElementsKernel.setArg<cl_int>(1, data.getSize()); assignElementsKernel.setArg<cl_int>(1, data.getSize());
assignElementsKernel.setArg<cl_int>(2, numBuckets); assignElementsKernel.setArg<cl_int>(2, numBuckets);
assignElementsKernel.setArg<cl::Buffer>(3, dataRange->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(3, dataRange.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(4, bucketOffset->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(4, bucketOffset.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(5, bucketOfElement->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(5, bucketOfElement.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(6, offsetInBucket->getDeviceBuffer()); assignElementsKernel.setArg<cl::Buffer>(6, offsetInBucket.getDeviceBuffer());
context.executeKernel(assignElementsKernel, data.getSize()); context.executeKernel(assignElementsKernel, data.getSize());
// Compute the position of each bucket. // Compute the position of each bucket.
computeBucketPositionsKernel.setArg<cl_int>(0, numBuckets); computeBucketPositionsKernel.setArg<cl_int>(0, numBuckets);
computeBucketPositionsKernel.setArg<cl::Buffer>(1, bucketOffset->getDeviceBuffer()); computeBucketPositionsKernel.setArg<cl::Buffer>(1, bucketOffset.getDeviceBuffer());
computeBucketPositionsKernel.setArg(2, positionsKernelSize*sizeof(cl_int), NULL); computeBucketPositionsKernel.setArg(2, positionsKernelSize*sizeof(cl_int), NULL);
context.executeKernel(computeBucketPositionsKernel, positionsKernelSize, positionsKernelSize); context.executeKernel(computeBucketPositionsKernel, positionsKernelSize, positionsKernelSize);
// Copy the data into the buckets. // Copy the data into the buckets.
copyToBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(1, buckets->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer());
copyToBucketsKernel.setArg<cl_int>(2, data.getSize()); copyToBucketsKernel.setArg<cl_int>(2, data.getSize());
copyToBucketsKernel.setArg<cl::Buffer>(3, bucketOffset->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(3, bucketOffset.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(4, bucketOfElement->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(4, bucketOfElement.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(5, offsetInBucket->getDeviceBuffer()); copyToBucketsKernel.setArg<cl::Buffer>(5, offsetInBucket.getDeviceBuffer());
context.executeKernel(copyToBucketsKernel, data.getSize()); context.executeKernel(copyToBucketsKernel, data.getSize());
// Sort each bucket. // Sort each bucket.
sortBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer()); sortBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
sortBucketsKernel.setArg<cl::Buffer>(1, buckets->getDeviceBuffer()); sortBucketsKernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer());
sortBucketsKernel.setArg<cl_int>(2, numBuckets); sortBucketsKernel.setArg<cl_int>(2, numBuckets);
sortBucketsKernel.setArg<cl::Buffer>(3, bucketOffset->getDeviceBuffer()); sortBucketsKernel.setArg<cl::Buffer>(3, bucketOffset.getDeviceBuffer());
sortBucketsKernel.setArg(4, sortKernelSize*trait->getDataSize(), NULL); sortBucketsKernel.setArg(4, sortKernelSize*trait->getDataSize(), NULL);
context.executeKernel(sortBucketsKernel, ((data.getSize()+sortKernelSize-1)/sortKernelSize)*sortKernelSize, sortKernelSize); context.executeKernel(sortBucketsKernel, ((data.getSize()+sortKernelSize-1)/sortKernelSize)*sortKernelSize, sortKernelSize);
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -107,13 +107,6 @@ private: ...@@ -107,13 +107,6 @@ private:
const DrudeForce& force; const DrudeForce& force;
}; };
OpenCLCalcDrudeForceKernel::~OpenCLCalcDrudeForceKernel() {
if (particleParams != NULL)
delete particleParams;
if (pairParams != NULL)
delete pairParams;
}
void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) { void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
int numContexts = cl.getPlatformData().contexts.size(); int numContexts = cl.getPlatformData().contexts.size();
int startParticleIndex = cl.getContextIndex()*force.getNumParticles()/numContexts; int startParticleIndex = cl.getContextIndex()*force.getNumParticles()/numContexts;
...@@ -123,7 +116,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -123,7 +116,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the harmonic interaction . // Create the harmonic interaction .
vector<vector<int> > atoms(numParticles, vector<int>(5)); vector<vector<int> > atoms(numParticles, vector<int>(5));
particleParams = OpenCLArray::create<mm_float4>(cl, numParticles, "drudeParticleParams"); particleParams.initialize<mm_float4>(cl, numParticles, "drudeParticleParams");
vector<mm_float4> paramVector(numParticles); vector<mm_float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
double charge, polarizability, aniso12, aniso34; double charge, polarizability, aniso12, aniso34;
...@@ -145,9 +138,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -145,9 +138,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
} }
paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f); paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f);
} }
particleParams->upload(paramVector); particleParams.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams->getDeviceBuffer(), "float4"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams.getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
} }
int startPairIndex = cl.getContextIndex()*force.getNumScreenedPairs()/numContexts; int startPairIndex = cl.getContextIndex()*force.getNumScreenedPairs()/numContexts;
...@@ -157,7 +150,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -157,7 +150,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the screened interaction between dipole pairs. // Create the screened interaction between dipole pairs.
vector<vector<int> > atoms(numPairs, vector<int>(4)); vector<vector<int> > atoms(numPairs, vector<int>(4));
pairParams = OpenCLArray::create<mm_float2>(cl, numPairs, "drudePairParams"); pairParams.initialize<mm_float2>(cl, numPairs, "drudePairParams");
vector<mm_float2> paramVector(numPairs); vector<mm_float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) { for (int i = 0; i < numPairs; i++) {
int drude1, drude2; int drude1, drude2;
...@@ -171,9 +164,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor ...@@ -171,9 +164,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
double energyScale = ONE_4PI_EPS0*charge1*charge2; double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = mm_float2((float) screeningScale, (float) energyScale); paramVector[i] = mm_float2((float) screeningScale, (float) energyScale);
} }
pairParams->upload(paramVector); pairParams.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams->getDeviceBuffer(), "float2"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams.getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudePairForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
} }
cl.addForce(new OpenCLDrudeForceInfo(force)); cl.addForce(new OpenCLDrudeForceInfo(force));
...@@ -192,7 +185,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -192,7 +185,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int endParticleIndex = (cl.getContextIndex()+1)*force.getNumParticles()/numContexts; int endParticleIndex = (cl.getContextIndex()+1)*force.getNumParticles()/numContexts;
int numParticles = endParticleIndex-startParticleIndex; int numParticles = endParticleIndex-startParticleIndex;
if (numParticles > 0) { if (numParticles > 0) {
if (particleParams == NULL || numParticles != particleParams->getSize()) if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
throw OpenMMException("updateParametersInContext: The number of Drude particles has changed"); throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
vector<mm_float4> paramVector(numParticles); vector<mm_float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -211,7 +204,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -211,7 +204,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
k2 = 0; k2 = 0;
paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f); paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f);
} }
particleParams->upload(paramVector); particleParams.upload(paramVector);
} }
// Set the pair parameters. // Set the pair parameters.
...@@ -220,7 +213,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -220,7 +213,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int endPairIndex = (cl.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts; int endPairIndex = (cl.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts;
int numPairs = endPairIndex-startPairIndex; int numPairs = endPairIndex-startPairIndex;
if (numPairs > 0) { if (numPairs > 0) {
if (pairParams == NULL || numPairs != pairParams->getSize()) if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
throw OpenMMException("updateParametersInContext: The number of screened pairs has changed"); throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
vector<mm_float2> paramVector(numPairs); vector<mm_float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) { for (int i = 0; i < numPairs; i++) {
...@@ -235,17 +228,10 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -235,17 +228,10 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
double energyScale = ONE_4PI_EPS0*charge1*charge2; double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = mm_float2((float) screeningScale, (float) energyScale); paramVector[i] = mm_float2((float) screeningScale, (float) energyScale);
} }
pairParams->upload(paramVector); pairParams.upload(paramVector);
} }
} }
OpenCLIntegrateDrudeLangevinStepKernel::~OpenCLIntegrateDrudeLangevinStepKernel() {
if (normalParticles != NULL)
delete normalParticles;
if (pairParticles != NULL)
delete pairParticles;
}
void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) { void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
...@@ -266,12 +252,12 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co ...@@ -266,12 +252,12 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
pairParticleVec.push_back(mm_int2(p, p1)); pairParticleVec.push_back(mm_int2(p, p1));
} }
normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end()); normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
normalParticles = OpenCLArray::create<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles"); normalParticles.initialize<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles = OpenCLArray::create<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles"); pairParticles.initialize<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles");
if (normalParticleVec.size() > 0) if (normalParticleVec.size() > 0)
normalParticles->upload(normalParticleVec); normalParticles.upload(normalParticleVec);
if (pairParticleVec.size() > 0) if (pairParticleVec.size() > 0)
pairParticles->upload(pairParticleVec); pairParticles.upload(pairParticleVec);
// Create kernels. // Create kernels.
...@@ -296,8 +282,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -296,8 +282,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, normalParticles->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(3, normalParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, pairParticles->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(4, pairParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getStepSize().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(5, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(12, integration.getRandom().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(12, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
...@@ -314,7 +300,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -314,7 +300,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
else else
hardwallKernel.setArg<void*>(1, NULL); hardwallKernel.setArg<void*>(1, NULL);
hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer()); hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(3, pairParticles->getDeviceBuffer()); hardwallKernel.setArg<cl::Buffer>(3, pairParticles.getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer()); hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
} }
...@@ -363,7 +349,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -363,7 +349,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Call the first integration kernel. // Call the first integration kernel.
kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles->getSize()+2*pairParticles->getSize())); kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize()));
cl.executeKernel(kernel1, numAtoms); cl.executeKernel(kernel1, numAtoms);
// Apply constraints. // Apply constraints.
...@@ -377,7 +363,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const ...@@ -377,7 +363,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Apply hard wall constraints. // Apply hard wall constraints.
if (maxDrudeDistance > 0) if (maxDrudeDistance > 0)
cl.executeKernel(hardwallKernel, pairParticles->getSize()); cl.executeKernel(hardwallKernel, pairParticles.getSize());
integration.computeVirtualSites(); integration.computeVirtualSites();
// Update the time and step count. // Update the time and step count.
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,9 +45,8 @@ namespace OpenMM { ...@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel { class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel {
public: public:
OpenCLCalcDrudeForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : OpenCLCalcDrudeForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
CalcDrudeForceKernel(name, platform), cl(cl), particleParams(NULL), pairParams(NULL) { CalcDrudeForceKernel(name, platform), cl(cl) {
} }
~OpenCLCalcDrudeForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -73,8 +72,8 @@ public: ...@@ -73,8 +72,8 @@ public:
void copyParametersToContext(ContextImpl& context, const DrudeForce& force); void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private: private:
OpenCLContext& cl; OpenCLContext& cl;
OpenCLArray* particleParams; OpenCLArray particleParams;
OpenCLArray* pairParams; OpenCLArray pairParams;
}; };
/** /**
...@@ -83,9 +82,8 @@ private: ...@@ -83,9 +82,8 @@ private:
class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel { class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public: public:
OpenCLIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : OpenCLIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false), normalParticles(NULL), pairParticles(NULL) { IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
} }
~OpenCLIntegrateDrudeLangevinStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -112,8 +110,8 @@ private: ...@@ -112,8 +110,8 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
double prevStepSize; double prevStepSize;
OpenCLArray* normalParticles; OpenCLArray normalParticles;
OpenCLArray* pairParticles; OpenCLArray pairParticles;
cl::Kernel kernel1, kernel2, hardwallKernel; cl::Kernel kernel1, kernel2, hardwallKernel;
}; };
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -41,19 +41,6 @@ ...@@ -41,19 +41,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) { void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies(); numCopies = integrator.getNumCopies();
...@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."); throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
int paddedParticles = cl.getPaddedNumAtoms(); int paddedParticles = cl.getPaddedNumAtoms();
int forceElementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int forceElementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
forces = new OpenCLArray(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces"); forces.initialize(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
bool useDoublePrecision = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()); bool useDoublePrecision = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4));
positions = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdPositions"); positions.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities"); velocities.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans. // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if (useDoublePrecision) { if (useDoublePrecision) {
vector<mm_double4> temp(positions->getSize()); vector<mm_double4> temp(positions.getSize());
for (int i = 0; i < positions->getSize(); i++) for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 0); temp[i] = mm_double4(0, 0, 0, 0);
positions->upload(temp); positions.upload(temp);
for (int i = 0; i < velocities->getSize(); i++) for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 1); temp[i] = mm_double4(0, 0, 0, 1);
velocities->upload(temp); velocities.upload(temp);
} }
else { else {
vector<mm_float4> temp(positions->getSize()); vector<mm_float4> temp(positions.getSize());
for (int i = 0; i < positions->getSize(); i++) for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 0); temp[i] = mm_float4(0, 0, 0, 0);
positions->upload(temp); positions.upload(temp);
for (int i = 0; i < velocities->getSize(); i++) for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 1); temp[i] = mm_float4(0, 0, 0, 1);
velocities->upload(temp); velocities.upload(temp);
} }
// Build a list of contractions. // Build a list of contractions.
...@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
} }
} }
if (maxContractedCopies > 0) { if (maxContractedCopies > 0) {
contractedForces = new OpenCLArray(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces"); contractedForces.initialize(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
contractedPositions = new OpenCLArray(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions"); contractedPositions.initialize(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
} }
// Create kernels. // Create kernels.
...@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) {
hasInitializedKernel = true; hasInitializedKernel = true;
pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); pileKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer()); stepKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(1, velocities->getDeviceBuffer()); stepKernel.setArg<cl::Buffer>(1, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(2, forces->getDeviceBuffer()); stepKernel.setArg<cl::Buffer>(2, forces.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); velocitiesKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); velocitiesKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(3, velocities->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(3, velocities.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer());
for (auto& g : groupsByCopies) { for (auto& g : groupsByCopies) {
int copies = g.first; int copies = g.first;
positionContractionKernels[copies].setArg<cl::Buffer>(0, positions->getDeviceBuffer()); positionContractionKernels[copies].setArg<cl::Buffer>(0, positions.getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions->getDeviceBuffer()); positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(0, forces->getDeviceBuffer()); forceContractionKernels[copies].setArg<cl::Buffer>(0, forces.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer()); forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
} }
} }
...@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction. // Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(5, positions.getDeviceBuffer());
for (int i = 0; i < numCopies; i++) { for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(5, i); copyToContextKernel.setArg<cl_int>(5, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms()); cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
...@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Now loop over contractions and compute forces from them. // Now loop over contractions and compute forces from them.
if (groupsByCopies.size() > 0) { if (groupsByCopies.size() > 0) {
copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions.getDeviceBuffer());
for (auto& g : groupsByCopies) { for (auto& g : groupsByCopies) {
int copies = g.first; int copies = g.first;
int groupFlags = g.second; int groupFlags = g.second;
...@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) { ...@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if (groupsByCopies.size() > 0) { if (groupsByCopies.size() > 0) {
// Ensure the Context contains the positions from the last copy, since we'll assume that later. // Ensure the Context contains the positions from the last copy, since we'll assume that later.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, numCopies-1); copyToContextKernel.setArg<cl_int>(5, numCopies-1);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms()); cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
} }
...@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, ...@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
} }
void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) { void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL) if (!positions.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles) if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
...@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p ...@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
cl.getPosq().download(posq); cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w); posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]); cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
} }
else if (cl.getUseMixedPrecision()) { else if (cl.getUseMixedPrecision()) {
vector<mm_float4> posqf(cl.getPaddedNumAtoms()); vector<mm_float4> posqf(cl.getPaddedNumAtoms());
...@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p ...@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
vector<mm_double4> posq(cl.getPaddedNumAtoms()); vector<mm_double4> posq(cl.getPaddedNumAtoms());
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w); posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]); cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
} }
else { else {
vector<mm_float4> posq(cl.getPaddedNumAtoms()); vector<mm_float4> posq(cl.getPaddedNumAtoms());
cl.getPosq().download(posq); cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = mm_float4((cl_float) offsetPos[i][0], (cl_float) offsetPos[i][1], (cl_float) offsetPos[i][2], posq[i].w); posq[i] = mm_float4((cl_float) offsetPos[i][0], (cl_float) offsetPos[i][1], (cl_float) offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]); cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
} }
} }
void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) { void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL) if (!velocities.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles) if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
...@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ...@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl.getVelm().download(velm); cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w); velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]); cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
} }
else { else {
vector<mm_float4> velm(cl.getPaddedNumAtoms()); vector<mm_float4> velm(cl.getPaddedNumAtoms());
cl.getVelm().download(velm); cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
velm[i] = mm_float4((cl_float) vel[i][0], (cl_float) vel[i][1], (cl_float) vel[i][2], velm[i].w); velm[i] = mm_float4((cl_float) vel[i][0], (cl_float) vel[i][1], (cl_float) vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]); cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
} }
} }
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
if (!hasInitializedKernel) if (!hasInitializedKernel)
initializeKernels(context); initializeKernels(context);
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, copy); copyToContextKernel.setArg<cl_int>(5, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms()); cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. * * Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,9 +45,8 @@ namespace OpenMM { ...@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel { class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public: public:
OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) { IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false) {
} }
~OpenCLIntegrateRPMDStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -92,11 +91,11 @@ private: ...@@ -92,11 +91,11 @@ private:
int numCopies, numParticles, workgroupSize; int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies; std::map<int, int> groupsByCopies;
int groupsNotContracted; int groupsNotContracted;
OpenCLArray* forces; OpenCLArray forces;
OpenCLArray* positions; OpenCLArray positions;
OpenCLArray* velocities; OpenCLArray velocities;
OpenCLArray* contractedForces; OpenCLArray contractedForces;
OpenCLArray* contractedPositions; OpenCLArray contractedPositions;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel; cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, cl::Kernel> positionContractionKernels; std::map<int, cl::Kernel> positionContractionKernels;
std::map<int, cl::Kernel> forceContractionKernels; std::map<int, cl::Kernel> forceContractionKernels;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment