"wrappers/vscode:/vscode.git/clone" did not exist on "b791d97993f380c59a4097e21256026739869b0c"
Commit b33ee3b0 authored by peastman's avatar peastman
Browse files

More conversion of OpenCLArrays

parent d59b0373
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -81,7 +81,6 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLBondedUtilities {
public:
OpenCLBondedUtilities(OpenCLContext& context);
~OpenCLBondedUtilities();
/**
* Add a bonded interaction.
*
......@@ -143,8 +142,8 @@ private:
std::vector<std::vector<int> > forceSets;
std::vector<cl::Memory*> arguments;
std::vector<std::string> argTypes;
std::vector<OpenCLArray*> atomIndices;
std::vector<OpenCLArray*> bufferIndices;
std::vector<OpenCLArray> atomIndices;
std::vector<OpenCLArray> bufferIndices;
std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives;
int numForceBuffers, maxBonds, allGroups;
......
......@@ -33,11 +33,10 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLCompact {
public:
OpenCLCompact(OpenCLContext& context);
~OpenCLCompact();
void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid);
private:
OpenCLContext& context;
OpenCLArray* dgBlockCounts;
OpenCLArray dgBlockCounts;
cl::Kernel countKernel;
cl::Kernel moveValidKernel;
};
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -42,25 +42,24 @@ namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLIntegrationUtilities {
public:
OpenCLIntegrationUtilities(OpenCLContext& context, const System& system);
~OpenCLIntegrationUtilities();
/**
* Get the array which contains position deltas.
*/
OpenCLArray& getPosDelta() {
return *posDelta;
return posDelta;
}
/**
* Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1.
*/
OpenCLArray& getRandom() {
return *random;
return random;
}
/**
* Get the array which contains the current step size.
*/
OpenCLArray& getStepSize() {
return *stepSize;
return stepSize;
}
/**
* Set the size to use for the next step.
......@@ -131,36 +130,36 @@ private:
cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel;
cl::Kernel vsitePositionKernel, vsiteForceKernel, vsiteAddForcesKernel;
cl::Kernel randomKernel, timeShiftKernel;
OpenCLArray* posDelta;
OpenCLArray* settleAtoms;
OpenCLArray* settleParams;
OpenCLArray* shakeAtoms;
OpenCLArray* shakeParams;
OpenCLArray* random;
OpenCLArray* randomSeed;
OpenCLArray* stepSize;
OpenCLArray* ccmaAtoms;
OpenCLArray* ccmaDistance;
OpenCLArray* ccmaReducedMass;
OpenCLArray* ccmaAtomConstraints;
OpenCLArray* ccmaNumAtomConstraints;
OpenCLArray* ccmaConstraintMatrixColumn;
OpenCLArray* ccmaConstraintMatrixValue;
OpenCLArray* ccmaDelta1;
OpenCLArray* ccmaDelta2;
OpenCLArray* ccmaConverged;
OpenCLArray* ccmaConvergedHostBuffer;
OpenCLArray* vsite2AvgAtoms;
OpenCLArray* vsite2AvgWeights;
OpenCLArray* vsite3AvgAtoms;
OpenCLArray* vsite3AvgWeights;
OpenCLArray* vsiteOutOfPlaneAtoms;
OpenCLArray* vsiteOutOfPlaneWeights;
OpenCLArray* vsiteLocalCoordsIndex;
OpenCLArray* vsiteLocalCoordsAtoms;
OpenCLArray* vsiteLocalCoordsWeights;
OpenCLArray* vsiteLocalCoordsPos;
OpenCLArray* vsiteLocalCoordsStartIndex;
OpenCLArray posDelta;
OpenCLArray settleAtoms;
OpenCLArray settleParams;
OpenCLArray shakeAtoms;
OpenCLArray shakeParams;
OpenCLArray random;
OpenCLArray randomSeed;
OpenCLArray stepSize;
OpenCLArray ccmaAtoms;
OpenCLArray ccmaDistance;
OpenCLArray ccmaReducedMass;
OpenCLArray ccmaAtomConstraints;
OpenCLArray ccmaNumAtomConstraints;
OpenCLArray ccmaConstraintMatrixColumn;
OpenCLArray ccmaConstraintMatrixValue;
OpenCLArray ccmaDelta1;
OpenCLArray ccmaDelta2;
OpenCLArray ccmaConverged;
OpenCLArray ccmaConvergedHostBuffer;
OpenCLArray vsite2AvgAtoms;
OpenCLArray vsite2AvgWeights;
OpenCLArray vsite3AvgAtoms;
OpenCLArray vsite3AvgWeights;
OpenCLArray vsiteOutOfPlaneAtoms;
OpenCLArray vsiteOutOfPlaneWeights;
OpenCLArray vsiteLocalCoordsIndex;
OpenCLArray vsiteLocalCoordsAtoms;
OpenCLArray vsiteLocalCoordsWeights;
OpenCLArray vsiteLocalCoordsPos;
OpenCLArray vsiteLocalCoordsStartIndex;
int randomPos;
int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites;
......
......@@ -1132,12 +1132,8 @@ private:
class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public:
OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL),
exceptionParams(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
hasInitializedKernels(false) {
}
~OpenCLCalcGayBerneForceKernel();
/**
* Initialize the kernel.
*
......@@ -1169,25 +1165,25 @@ private:
bool hasInitializedKernels;
int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod;
OpenCLArray* sortedParticles;
OpenCLArray* axisParticleIndices;
OpenCLArray* sigParams;
OpenCLArray* epsParams;
OpenCLArray* scale;
OpenCLArray* exceptionParticles;
OpenCLArray* exceptionParams;
OpenCLArray* aMatrix;
OpenCLArray* bMatrix;
OpenCLArray* gMatrix;
OpenCLArray* exclusions;
OpenCLArray* exclusionStartIndex;
OpenCLArray* blockCenter;
OpenCLArray* blockBoundingBox;
OpenCLArray* neighbors;
OpenCLArray* neighborIndex;
OpenCLArray* neighborBlockCount;
OpenCLArray* sortedPos;
OpenCLArray* torque;
OpenCLArray sortedParticles;
OpenCLArray axisParticleIndices;
OpenCLArray sigParams;
OpenCLArray epsParams;
OpenCLArray scale;
OpenCLArray exceptionParticles;
OpenCLArray exceptionParams;
OpenCLArray aMatrix;
OpenCLArray bMatrix;
OpenCLArray gMatrix;
OpenCLArray exclusions;
OpenCLArray exclusionStartIndex;
OpenCLArray blockCenter;
OpenCLArray blockBoundingBox;
OpenCLArray neighbors;
OpenCLArray neighborIndex;
OpenCLArray neighborBlockCount;
OpenCLArray sortedPos;
OpenCLArray torque;
std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs;
......@@ -1200,9 +1196,8 @@ private:
class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public:
OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform),
cl(cl), hasInitializedKernels(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) {
cl(cl), hasInitializedKernels(false) {
}
~OpenCLCalcCustomCVForceKernel();
/**
* Initialize the kernel.
*
......@@ -1236,9 +1231,9 @@ private:
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<OpenCLArray*> cvForces;
OpenCLArray* invAtomOrder;
OpenCLArray* innerInvAtomOrder;
std::vector<OpenCLArray> cvForces;
OpenCLArray invAtomOrder;
OpenCLArray innerInvAtomOrder;
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
};
......@@ -1247,10 +1242,8 @@ private:
*/
class OpenCLCalcRMSDForceKernel : public CalcRMSDForceKernel {
public:
OpenCLCalcRMSDForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcRMSDForceKernel(name, platform),
cl(cl), referencePos(NULL), particles(NULL), buffer(NULL) {
OpenCLCalcRMSDForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcRMSDForceKernel(name, platform), cl(cl) {
}
~OpenCLCalcRMSDForceKernel();
/**
* Initialize the kernel.
*
......@@ -1289,9 +1282,9 @@ private:
OpenCLContext& cl;
ForceInfo* info;
double sumNormRef;
OpenCLArray* referencePos;
OpenCLArray* particles;
OpenCLArray* buffer;
OpenCLArray referencePos;
OpenCLArray particles;
OpenCLArray buffer;
cl::Kernel kernel1, kernel2;
};
......@@ -1337,9 +1330,8 @@ private:
class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public:
OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), params(NULL) {
hasInitializedKernels(false) {
}
~OpenCLIntegrateLangevinStepKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
......@@ -1365,7 +1357,7 @@ private:
OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels;
OpenCLArray* params;
OpenCLArray params;
cl::Kernel kernel1, kernel2;
};
......@@ -1451,9 +1443,8 @@ private:
class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
public:
OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), params(NULL) {
hasInitializedKernels(false) {
}
~OpenCLIntegrateVariableLangevinStepKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
......@@ -1481,7 +1472,7 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
int blockSize;
OpenCLArray* params;
OpenCLArray params;
cl::Kernel kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol;
};
......@@ -1493,8 +1484,7 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
}
~OpenCLIntegrateCustomStepKernel();
/**
......@@ -1575,15 +1565,15 @@ private:
int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent;
OpenCLArray* globalValues;
OpenCLArray* sumBuffer;
OpenCLArray* summedValue;
OpenCLArray* uniformRandoms;
OpenCLArray* randomSeed;
OpenCLArray* perDofEnergyParamDerivs;
std::vector<OpenCLArray*> tabulatedFunctions;
OpenCLArray globalValues;
OpenCLArray sumBuffer;
OpenCLArray summedValue;
OpenCLArray uniformRandoms;
OpenCLArray randomSeed;
OpenCLArray perDofEnergyParamDerivs;
std::vector<OpenCLArray> tabulatedFunctions;
std::map<int, double> savedEnergy;
std::map<int, OpenCLArray*> savedForces;
std::map<int, OpenCLArray> savedForces;
std::set<int> validSavedForces;
OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat;
......@@ -1635,9 +1625,8 @@ public:
class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
public:
OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl),
hasInitializedKernels(false), atomGroups(NULL) {
hasInitializedKernels(false) {
}
~OpenCLApplyAndersenThermostatKernel();
/**
* Initialize the kernel.
*
......@@ -1655,7 +1644,7 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
int randomSeed;
OpenCLArray* atomGroups;
OpenCLArray atomGroups;
cl::Kernel kernel;
};
......@@ -1665,9 +1654,8 @@ private:
class OpenCLApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public:
OpenCLApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyMonteCarloBarostatKernel(name, platform), cl(cl),
hasInitializedKernels(false), savedPositions(NULL), savedForces(NULL), moleculeAtoms(NULL), moleculeStartIndex(NULL) {
hasInitializedKernels(false) {
}
~OpenCLApplyMonteCarloBarostatKernel();
/**
* Initialize the kernel.
*
......@@ -1699,10 +1687,10 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
int numMolecules;
OpenCLArray* savedPositions;
OpenCLArray* savedForces;
OpenCLArray* moleculeAtoms;
OpenCLArray* moleculeStartIndex;
OpenCLArray savedPositions;
OpenCLArray savedForces;
OpenCLArray moleculeAtoms;
OpenCLArray moleculeStartIndex;
cl::Kernel kernel;
std::vector<int> lastAtomOrder;
};
......@@ -1712,9 +1700,8 @@ private:
*/
class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel {
public:
OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl), cmMomentum(NULL) {
OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl) {
}
~OpenCLRemoveCMMotionKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
......@@ -1731,7 +1718,7 @@ public:
private:
OpenCLContext& cl;
int frequency;
OpenCLArray* cmMomentum;
OpenCLArray cmMomentum;
cl::Kernel kernel1, kernel2;
};
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -175,55 +175,55 @@ public:
* Get the array containing the center of each atom block.
*/
OpenCLArray& getBlockCenters() {
return *blockCenter;
return blockCenter;
}
/**
* Get the array containing the dimensions of each atom block.
*/
OpenCLArray& getBlockBoundingBoxes() {
return *blockBoundingBox;
return blockBoundingBox;
}
/**
* Get the array whose first element contains the number of tiles with interactions.
*/
OpenCLArray& getInteractionCount() {
return *interactionCount;
return interactionCount;
}
/**
* Get the array containing tiles with interactions.
*/
OpenCLArray& getInteractingTiles() {
return *interactingTiles;
return interactingTiles;
}
/**
* Get the array containing the atoms in each tile with interactions.
*/
OpenCLArray& getInteractingAtoms() {
return *interactingAtoms;
return interactingAtoms;
}
/**
* Get the array containing exclusion flags.
*/
OpenCLArray& getExclusions() {
return *exclusions;
return exclusions;
}
/**
* Get the array containing tiles with exclusions.
*/
OpenCLArray& getExclusionTiles() {
return *exclusionTiles;
return exclusionTiles;
}
/**
* Get the array containing the index into the exclusion array for each tile.
*/
OpenCLArray& getExclusionIndices() {
return *exclusionIndices;
return exclusionIndices;
}
/**
* Get the array listing where the exclusion data starts for each row.
*/
OpenCLArray& getExclusionRowIndices() {
return *exclusionRowIndices;
return exclusionRowIndices;
}
/**
* Get the index of the first tile this context is responsible for processing.
......@@ -275,20 +275,20 @@ private:
class BlockSortTrait;
OpenCLContext& context;
std::map<int, KernelSet> groupKernels;
OpenCLArray* exclusionTiles;
OpenCLArray* exclusions;
OpenCLArray* exclusionIndices;
OpenCLArray* exclusionRowIndices;
OpenCLArray* interactingTiles;
OpenCLArray* interactingAtoms;
OpenCLArray* interactionCount;
OpenCLArray* blockCenter;
OpenCLArray* blockBoundingBox;
OpenCLArray* sortedBlocks;
OpenCLArray* sortedBlockCenter;
OpenCLArray* sortedBlockBoundingBox;
OpenCLArray* oldPositions;
OpenCLArray* rebuildNeighborList;
OpenCLArray exclusionTiles;
OpenCLArray exclusions;
OpenCLArray exclusionIndices;
OpenCLArray exclusionRowIndices;
OpenCLArray interactingTiles;
OpenCLArray interactingAtoms;
OpenCLArray interactionCount;
OpenCLArray blockCenter;
OpenCLArray blockBoundingBox;
OpenCLArray sortedBlocks;
OpenCLArray sortedBlockCenter;
OpenCLArray sortedBlockBoundingBox;
OpenCLArray oldPositions;
OpenCLArray rebuildNeighborList;
OpenCLSort* blockSorter;
cl::Event downloadCountEvent;
cl::Buffer* pinnedCountBuffer;
......
......@@ -84,7 +84,7 @@ private:
std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions;
std::vector<int> tileCounts;
OpenCLArray* contextForces;
OpenCLArray contextForces;
cl::Buffer* pinnedPositionBuffer;
cl::Buffer* pinnedForceBuffer;
void* pinnedPositionMemory;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-2013 Stanford University and the Authors. *
* Portions copyright (c) 2010-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -87,11 +87,11 @@ public:
private:
OpenCLContext& context;
SortTrait* trait;
OpenCLArray* dataRange;
OpenCLArray* bucketOfElement;
OpenCLArray* offsetInBucket;
OpenCLArray* bucketOffset;
OpenCLArray* buckets;
OpenCLArray dataRange;
OpenCLArray bucketOfElement;
OpenCLArray offsetInBucket;
OpenCLArray bucketOffset;
OpenCLArray buckets;
cl::Kernel shortListKernel, computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel;
unsigned int dataLength, rangeKernelSize, positionsKernelSize, sortKernelSize;
bool isShortList;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -36,13 +36,6 @@ using namespace std;
OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) {
}
OpenCLBondedUtilities::~OpenCLBondedUtilities() {
for (int i = 0; i < (int) atomIndices.size(); i++)
delete atomIndices[i];
for (int i = 0; i < (int) bufferIndices.size(); i++)
delete bufferIndices[i];
}
void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, const string& source, int group) {
if (atoms.size() > 0) {
forceAtoms.push_back(atoms);
......@@ -92,6 +85,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
vector<vector<cl_uint> > bufferVec(numForces);
vector<vector<int> > bufferCounter(numForces, vector<int>(system.getNumParticles(), 0));
vector<int> numBuffers(numForces, 0);
atomIndices.resize(numForces);
for (int i = 0; i < numForces; i++) {
int numBonds = forceAtoms[i].size();
int numAtoms = forceAtoms[i][0].size();
......@@ -101,9 +95,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < numAtoms; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][atom];
}
OpenCLArray* indices = OpenCLArray::create<cl_uint>(context, indexVec.size(), "bondedIndices");
indices->upload(indexVec);
atomIndices.push_back(indices);
atomIndices[i].initialize<cl_uint>(context, indexVec.size(), "bondedIndices");
atomIndices[i].upload(indexVec);
bufferVec[i].resize(width*numBonds, 0);
for (int bond = 0; bond < numBonds; bond++) {
for (int atom = 0; atom < numAtoms; atom++)
......@@ -177,9 +170,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int bond = 0; bond < numBonds; bond++)
for (int atom = 0; atom < numAtoms; atom++)
bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]];
OpenCLArray* buffers = OpenCLArray::create<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices");
buffers->upload(bufferVec[force]);
bufferIndices[force] = buffers;
bufferIndices[force].initialize<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices");
bufferIndices[force].upload(bufferVec[force]);
}
// Create the kernels.
......@@ -291,8 +283,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
index += 6;
for (int j = 0; j < (int) forceSets[i].size(); j++) {
kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]].getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]].getDeviceBuffer());
}
for (int j = 0; j < (int) arguments.size(); j++)
kernel.setArg<cl::Memory>(index++, *arguments[j]);
......
......@@ -29,18 +29,13 @@
using namespace OpenMM;
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) {
dgBlockCounts = OpenCLArray::create<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context) {
dgBlockCounts.initialize<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
cl::Program program = context.createProgram(OpenCLKernelSources::compact);
countKernel = cl::Kernel(program, "countElts");
moveValidKernel = cl::Kernel(program, "moveValidElementsStaged");
}
OpenCLCompact::~OpenCLCompact() {
if (dgBlockCounts != NULL)
delete dgBlockCounts;
}
void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid) {
// Figure out # elements per block
unsigned int len = dIn.getSize();
......@@ -51,7 +46,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
// TODO: implement loop over blocks of 10M
// Phase 1: Calculate number of valid elements per thread block
countKernel.setArg<cl::Buffer>(0, dgBlockCounts->getDeviceBuffer());
countKernel.setArg<cl::Buffer>(0, dgBlockCounts.getDeviceBuffer());
countKernel.setArg<cl::Buffer>(1, dValid.getDeviceBuffer());
countKernel.setArg<cl_uint>(2, len);
countKernel.setArg(3, 128*sizeof(cl_uint), NULL);
......@@ -61,7 +56,7 @@ void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArr
moveValidKernel.setArg<cl::Buffer>(0, dIn.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(1, dOut.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(2, dValid.getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(3, dgBlockCounts->getDeviceBuffer());
moveValidKernel.setArg<cl::Buffer>(3, dgBlockCounts.getDeviceBuffer());
moveValidKernel.setArg<cl_uint>(4, len);
moveValidKernel.setArg<cl::Buffer>(5, numValid.getDeviceBuffer());
moveValidKernel.setArg(6, 128*sizeof(cl_uint), NULL);
......
This diff is collapsed.
......@@ -118,14 +118,12 @@ private:
OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
tileCounts(data.contexts.size()), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedPositionMemory(NULL), pinnedForceBuffer(NULL), pinnedForceMemory(NULL) {
tileCounts(data.contexts.size()), pinnedPositionBuffer(NULL), pinnedPositionMemory(NULL), pinnedForceBuffer(NULL), pinnedForceMemory(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
}
OpenCLParallelCalcForcesAndEnergyKernel::~OpenCLParallelCalcForcesAndEnergyKernel() {
if (contextForces != NULL)
delete contextForces;
if (pinnedPositionBuffer != NULL)
delete pinnedPositionBuffer;
if (pinnedForceBuffer != NULL)
......@@ -142,8 +140,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
OpenCLContext& cl0 = *data.contexts[0];
int elementSize = (cl0.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
if (contextForces == NULL) {
contextForces = OpenCLArray::create<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
if (!contextForces.isInitialized()) {
contextForces.initialize<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces");
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*elementSize;
pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
......@@ -179,9 +177,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext& cl = *data.contexts[0];
int numAtoms = cl.getPaddedNumAtoms();
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*elementSize,
cl.getQueue().enqueueWriteBuffer(contextForces.getDeviceBuffer(), CL_FALSE, numAtoms*elementSize,
numAtoms*(data.contexts.size()-1)*elementSize, pinnedForceMemory);
cl.reduceBuffer(*contextForces, data.contexts.size());
cl.reduceBuffer(contextForces, data.contexts.size());
// Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first.
......
......@@ -31,8 +31,7 @@
using namespace OpenMM;
using namespace std;
OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait),
dataRange(NULL), bucketOfElement(NULL), offsetInBucket(NULL), bucketOffset(NULL), buckets(NULL), dataLength(length) {
OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait), dataLength(length) {
// Create kernels.
std::map<std::string, std::string> replacements;
......@@ -81,26 +80,16 @@ OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int le
// Create workspace arrays.
if (!isShortList) {
dataRange = new OpenCLArray(context, 2, trait->getKeySize(), "sortDataRange");
bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket");
buckets = new OpenCLArray(context, length, trait->getDataSize(), "buckets");
dataRange.initialize(context, 2, trait->getKeySize(), "sortDataRange");
bucketOffset.initialize<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement.initialize<cl_uint>(context, length, "bucketOfElement");
offsetInBucket.initialize<cl_uint>(context, length, "offsetInBucket");
buckets.initialize(context, length, trait->getDataSize(), "buckets");
}
}
OpenCLSort::~OpenCLSort() {
delete trait;
if (dataRange != NULL)
delete dataRange;
if (bucketOfElement != NULL)
delete bucketOfElement;
if (offsetInBucket != NULL)
delete offsetInBucket;
if (bucketOffset != NULL)
delete bucketOffset;
if (buckets != NULL)
delete buckets;
}
void OpenCLSort::sort(OpenCLArray& data) {
......@@ -119,14 +108,14 @@ void OpenCLSort::sort(OpenCLArray& data) {
else {
// Compute the range of data values.
unsigned int numBuckets = bucketOffset->getSize();
unsigned int numBuckets = bucketOffset.getSize();
computeRangeKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
computeRangeKernel.setArg<cl_uint>(1, data.getSize());
computeRangeKernel.setArg<cl::Buffer>(2, dataRange->getDeviceBuffer());
computeRangeKernel.setArg<cl::Buffer>(2, dataRange.getDeviceBuffer());
computeRangeKernel.setArg(3, rangeKernelSize*trait->getKeySize(), NULL);
computeRangeKernel.setArg(4, rangeKernelSize*trait->getKeySize(), NULL);
computeRangeKernel.setArg<cl_int>(5, numBuckets);
computeRangeKernel.setArg<cl::Buffer>(6, bucketOffset->getDeviceBuffer());
computeRangeKernel.setArg<cl::Buffer>(6, bucketOffset.getDeviceBuffer());
context.executeKernel(computeRangeKernel, rangeKernelSize, rangeKernelSize);
// Assign array elements to buckets.
......@@ -134,35 +123,35 @@ void OpenCLSort::sort(OpenCLArray& data) {
assignElementsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
assignElementsKernel.setArg<cl_int>(1, data.getSize());
assignElementsKernel.setArg<cl_int>(2, numBuckets);
assignElementsKernel.setArg<cl::Buffer>(3, dataRange->getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(4, bucketOffset->getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(5, bucketOfElement->getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(6, offsetInBucket->getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(3, dataRange.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(4, bucketOffset.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(5, bucketOfElement.getDeviceBuffer());
assignElementsKernel.setArg<cl::Buffer>(6, offsetInBucket.getDeviceBuffer());
context.executeKernel(assignElementsKernel, data.getSize());
// Compute the position of each bucket.
computeBucketPositionsKernel.setArg<cl_int>(0, numBuckets);
computeBucketPositionsKernel.setArg<cl::Buffer>(1, bucketOffset->getDeviceBuffer());
computeBucketPositionsKernel.setArg<cl::Buffer>(1, bucketOffset.getDeviceBuffer());
computeBucketPositionsKernel.setArg(2, positionsKernelSize*sizeof(cl_int), NULL);
context.executeKernel(computeBucketPositionsKernel, positionsKernelSize, positionsKernelSize);
// Copy the data into the buckets.
copyToBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(1, buckets->getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer());
copyToBucketsKernel.setArg<cl_int>(2, data.getSize());
copyToBucketsKernel.setArg<cl::Buffer>(3, bucketOffset->getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(4, bucketOfElement->getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(5, offsetInBucket->getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(3, bucketOffset.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(4, bucketOfElement.getDeviceBuffer());
copyToBucketsKernel.setArg<cl::Buffer>(5, offsetInBucket.getDeviceBuffer());
context.executeKernel(copyToBucketsKernel, data.getSize());
// Sort each bucket.
sortBucketsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
sortBucketsKernel.setArg<cl::Buffer>(1, buckets->getDeviceBuffer());
sortBucketsKernel.setArg<cl::Buffer>(1, buckets.getDeviceBuffer());
sortBucketsKernel.setArg<cl_int>(2, numBuckets);
sortBucketsKernel.setArg<cl::Buffer>(3, bucketOffset->getDeviceBuffer());
sortBucketsKernel.setArg<cl::Buffer>(3, bucketOffset.getDeviceBuffer());
sortBucketsKernel.setArg(4, sortKernelSize*trait->getDataSize(), NULL);
context.executeKernel(sortBucketsKernel, ((data.getSize()+sortKernelSize-1)/sortKernelSize)*sortKernelSize, sortKernelSize);
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -107,13 +107,6 @@ private:
const DrudeForce& force;
};
OpenCLCalcDrudeForceKernel::~OpenCLCalcDrudeForceKernel() {
if (particleParams != NULL)
delete particleParams;
if (pairParams != NULL)
delete pairParams;
}
void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startParticleIndex = cl.getContextIndex()*force.getNumParticles()/numContexts;
......@@ -123,7 +116,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the harmonic interaction .
vector<vector<int> > atoms(numParticles, vector<int>(5));
particleParams = OpenCLArray::create<mm_float4>(cl, numParticles, "drudeParticleParams");
particleParams.initialize<mm_float4>(cl, numParticles, "drudeParticleParams");
vector<mm_float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
double charge, polarizability, aniso12, aniso34;
......@@ -145,9 +138,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
}
paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f);
}
particleParams->upload(paramVector);
particleParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams->getDeviceBuffer(), "float4");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams.getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
}
int startPairIndex = cl.getContextIndex()*force.getNumScreenedPairs()/numContexts;
......@@ -157,7 +150,7 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
// Create the screened interaction between dipole pairs.
vector<vector<int> > atoms(numPairs, vector<int>(4));
pairParams = OpenCLArray::create<mm_float2>(cl, numPairs, "drudePairParams");
pairParams.initialize<mm_float2>(cl, numPairs, "drudePairParams");
vector<mm_float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) {
int drude1, drude2;
......@@ -171,9 +164,9 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = mm_float2((float) screeningScale, (float) energyScale);
}
pairParams->upload(paramVector);
pairParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams->getDeviceBuffer(), "float2");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams.getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
}
cl.addForce(new OpenCLDrudeForceInfo(force));
......@@ -192,7 +185,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int endParticleIndex = (cl.getContextIndex()+1)*force.getNumParticles()/numContexts;
int numParticles = endParticleIndex-startParticleIndex;
if (numParticles > 0) {
if (particleParams == NULL || numParticles != particleParams->getSize())
if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
vector<mm_float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
......@@ -211,7 +204,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
k2 = 0;
paramVector[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f);
}
particleParams->upload(paramVector);
particleParams.upload(paramVector);
}
// Set the pair parameters.
......@@ -220,7 +213,7 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
int endPairIndex = (cl.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts;
int numPairs = endPairIndex-startPairIndex;
if (numPairs > 0) {
if (pairParams == NULL || numPairs != pairParams->getSize())
if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
vector<mm_float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) {
......@@ -235,17 +228,10 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = mm_float2((float) screeningScale, (float) energyScale);
}
pairParams->upload(paramVector);
pairParams.upload(paramVector);
}
}
OpenCLIntegrateDrudeLangevinStepKernel::~OpenCLIntegrateDrudeLangevinStepKernel() {
if (normalParticles != NULL)
delete normalParticles;
if (pairParticles != NULL)
delete pairParticles;
}
void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
......@@ -266,12 +252,12 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
pairParticleVec.push_back(mm_int2(p, p1));
}
normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
normalParticles = OpenCLArray::create<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles = OpenCLArray::create<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles");
normalParticles.initialize<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles.initialize<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles");
if (normalParticleVec.size() > 0)
normalParticles->upload(normalParticleVec);
normalParticles.upload(normalParticleVec);
if (pairParticleVec.size() > 0)
pairParticles->upload(pairParticleVec);
pairParticles.upload(pairParticleVec);
// Create kernels.
......@@ -296,8 +282,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, normalParticles->getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, pairParticles->getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, normalParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, pairParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(12, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
......@@ -314,7 +300,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
else
hardwallKernel.setArg<void*>(1, NULL);
hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(3, pairParticles->getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(3, pairParticles.getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
}
......@@ -363,7 +349,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Call the first integration kernel.
kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles->getSize()+2*pairParticles->getSize()));
kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize()));
cl.executeKernel(kernel1, numAtoms);
// Apply constraints.
......@@ -377,7 +363,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Apply hard wall constraints.
if (maxDrudeDistance > 0)
cl.executeKernel(hardwallKernel, pairParticles->getSize());
cl.executeKernel(hardwallKernel, pairParticles.getSize());
integration.computeVirtualSites();
// Update the time and step count.
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel {
public:
OpenCLCalcDrudeForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
CalcDrudeForceKernel(name, platform), cl(cl), particleParams(NULL), pairParams(NULL) {
CalcDrudeForceKernel(name, platform), cl(cl) {
}
~OpenCLCalcDrudeForceKernel();
/**
* Initialize the kernel.
*
......@@ -73,8 +72,8 @@ public:
void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private:
OpenCLContext& cl;
OpenCLArray* particleParams;
OpenCLArray* pairParams;
OpenCLArray particleParams;
OpenCLArray pairParams;
};
/**
......@@ -83,9 +82,8 @@ private:
class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public:
OpenCLIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false), normalParticles(NULL), pairParticles(NULL) {
IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
}
~OpenCLIntegrateDrudeLangevinStepKernel();
/**
* Initialize the kernel.
*
......@@ -112,8 +110,8 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
double prevStepSize;
OpenCLArray* normalParticles;
OpenCLArray* pairParticles;
OpenCLArray normalParticles;
OpenCLArray pairParticles;
cl::Kernel kernel1, kernel2, hardwallKernel;
};
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -41,19 +41,6 @@
using namespace OpenMM;
using namespace std;
OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies();
......@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
int paddedParticles = cl.getPaddedNumAtoms();
int forceElementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
forces = new OpenCLArray(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
forces.initialize(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
bool useDoublePrecision = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4));
positions = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
positions.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if (useDoublePrecision) {
vector<mm_double4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<mm_double4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
else {
vector<mm_float4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<mm_float4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
// Build a list of contractions.
......@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
}
}
if (maxContractedCopies > 0) {
contractedForces = new OpenCLArray(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
contractedPositions = new OpenCLArray(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
contractedForces.initialize(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
contractedPositions.initialize(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
}
// Create kernels.
......@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) {
hasInitializedKernel = true;
pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(1, velocities->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(2, forces->getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
pileKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(1, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(2, forces.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(3, velocities->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(3, velocities.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer());
for (auto& g : groupsByCopies) {
int copies = g.first;
positionContractionKernels[copies].setArg<cl::Buffer>(0, positions->getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions->getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(0, forces->getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(0, positions.getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(0, forces.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
}
}
......@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions.getDeviceBuffer());
for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(5, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
......@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Now loop over contractions and compute forces from them.
if (groupsByCopies.size() > 0) {
copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions.getDeviceBuffer());
for (auto& g : groupsByCopies) {
int copies = g.first;
int groupFlags = g.second;
......@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if (groupsByCopies.size() > 0) {
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, numCopies-1);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
}
......@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
}
void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL)
if (!positions.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
......@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
}
else if (cl.getUseMixedPrecision()) {
vector<mm_float4> posqf(cl.getPaddedNumAtoms());
......@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
vector<mm_double4> posq(cl.getPaddedNumAtoms());
for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
}
else {
vector<mm_float4> posq(cl.getPaddedNumAtoms());
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = mm_float4((cl_float) offsetPos[i][0], (cl_float) offsetPos[i][1], (cl_float) offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
}
}
void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL)
if (!velocities.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
......@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
}
else {
vector<mm_float4> velm(cl.getPaddedNumAtoms());
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = mm_float4((cl_float) vel[i][0], (cl_float) vel[i][1], (cl_float) vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
}
}
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
if (!hasInitializedKernel)
initializeKernels(context);
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
}
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public:
OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false) {
}
~OpenCLIntegrateRPMDStepKernel();
/**
* Initialize the kernel.
*
......@@ -92,11 +91,11 @@ private:
int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies;
int groupsNotContracted;
OpenCLArray* forces;
OpenCLArray* positions;
OpenCLArray* velocities;
OpenCLArray* contractedForces;
OpenCLArray* contractedPositions;
OpenCLArray forces;
OpenCLArray positions;
OpenCLArray velocities;
OpenCLArray contractedForces;
OpenCLArray contractedPositions;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, cl::Kernel> positionContractionKernels;
std::map<int, cl::Kernel> forceContractionKernels;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment