Commit d59b0373 authored by peastman's avatar peastman
Browse files

Convert OpenCLArray to use RAII

parent 44daf269
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -27,15 +27,19 @@ ...@@ -27,15 +27,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLContext.h" #define __CL_ENABLE_EXCEPTIONS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "windowsExportOpenCL.h" #include "windowsExportOpenCL.h"
#include <cl.hpp>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
namespace OpenMM { namespace OpenMM {
class OpenCLContext;
/** /**
* This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it, * This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it,
* and for copying data to and from the OpenCL Buffer. * and for copying data to and from the OpenCL Buffer.
...@@ -69,6 +73,11 @@ public: ...@@ -69,6 +73,11 @@ public:
static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) { static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) {
return new OpenCLArray(context, buffer, size, sizeof(T), name); return new OpenCLArray(context, buffer, size, sizeof(T), name);
} }
/**
* Create an uninitialized OpenCLArray object. It does not point to any OpenCL Buffer,
* and cannot be used until initialize() is called on it.
*/
OpenCLArray();
/** /**
* Create an OpenCLArray object. * Create an OpenCLArray object.
* *
...@@ -90,6 +99,61 @@ public: ...@@ -90,6 +99,61 @@ public:
*/ */
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name); OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name);
~OpenCLArray(); ~OpenCLArray();
/**
* Initialize this object.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
void initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags = CL_MEM_READ_WRITE);
/**
* Initialize this object to use a preexisting Buffer.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void initialize(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name);
/**
* Initialize this object. The template argument is the data type of each array element.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
template <class T>
void initialize(OpenCLContext& context, int size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
initialize(context, size, sizeof(T), name, flags);
}
/**
* Initialize this object to use a preexisting Buffer. The template argument
* is the data type of each array element.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param name the name of the array
*/
template <class T>
void initialize(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) {
initialize(context, buffer, size, sizeof(T), name);
}
/**
* Recreate the internal storage to have a different size.
*/
void resize(int size);
/**
* Get whether this array has been initialized.
*/
bool isInitialized() const {
return (buffer != NULL);
}
/** /**
* Get the size of the array. * Get the size of the array.
*/ */
...@@ -155,9 +219,10 @@ public: ...@@ -155,9 +219,10 @@ public:
*/ */
void copyTo(OpenCLArray& dest) const; void copyTo(OpenCLArray& dest) const;
private: private:
OpenCLContext& context; OpenCLContext* context;
cl::Buffer* buffer; cl::Buffer* buffer;
int size, elementSize; int size, elementSize;
cl_int flags;
bool ownsBuffer; bool ownsBuffer;
std::string name; std::string name;
}; };
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
*/ */
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLContext.h"
namespace OpenMM { namespace OpenMM {
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -39,11 +39,11 @@ ...@@ -39,11 +39,11 @@
#include <pthread.h> #include <pthread.h>
#include <cl.hpp> #include <cl.hpp>
#include "windowsExportOpenCL.h" #include "windowsExportOpenCL.h"
#include "OpenCLArray.h"
#include "OpenCLPlatform.h" #include "OpenCLPlatform.h"
namespace OpenMM { namespace OpenMM {
class OpenCLArray;
class OpenCLForceInfo; class OpenCLForceInfo;
class OpenCLIntegrationUtilities; class OpenCLIntegrationUtilities;
class OpenCLExpressionUtilities; class OpenCLExpressionUtilities;
...@@ -227,49 +227,49 @@ public: ...@@ -227,49 +227,49 @@ public:
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom. * Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/ */
OpenCLArray& getPosq() { OpenCLArray& getPosq() {
return *posq; return posq;
} }
/** /**
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true. * Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
*/ */
OpenCLArray& getPosqCorrection() { OpenCLArray& getPosqCorrection() {
return *posqCorrection; return posqCorrection;
} }
/** /**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom. * Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/ */
OpenCLArray& getVelm() { OpenCLArray& getVelm() {
return *velm; return velm;
} }
/** /**
* Get the array which contains the force on each atom. * Get the array which contains the force on each atom.
*/ */
OpenCLArray& getForce() { OpenCLArray& getForce() {
return *force; return force;
} }
/** /**
* Get the array which contains the buffers in which forces are computed. * Get the array which contains the buffers in which forces are computed.
*/ */
OpenCLArray& getForceBuffers() { OpenCLArray& getForceBuffers() {
return *forceBuffers; return forceBuffers;
} }
/** /**
* Get the array which contains a contribution to each force represented as 64 bit fixed point. * Get the array which contains a contribution to each force represented as 64 bit fixed point.
*/ */
OpenCLArray& getLongForceBuffer() { OpenCLArray& getLongForceBuffer() {
return *longForceBuffer; return longForceBuffer;
} }
/** /**
* Get the array which contains the buffer in which energy is computed. * Get the array which contains the buffer in which energy is computed.
*/ */
OpenCLArray& getEnergyBuffer() { OpenCLArray& getEnergyBuffer() {
return *energyBuffer; return energyBuffer;
} }
/** /**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed. * Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/ */
OpenCLArray& getEnergyParamDerivBuffer() { OpenCLArray& getEnergyParamDerivBuffer() {
return *energyParamDerivBuffer; return energyParamDerivBuffer;
} }
/** /**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device. * Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
...@@ -288,7 +288,7 @@ public: ...@@ -288,7 +288,7 @@ public:
* Get the array which contains the index of each atom. * Get the array which contains the index of each atom.
*/ */
OpenCLArray& getAtomIndexArray() { OpenCLArray& getAtomIndexArray() {
return *atomIndexDevice; return atomIndexDevice;
} }
/** /**
* Get the number of cells by which the positions are offset. * Get the number of cells by which the positions are offset.
...@@ -762,17 +762,17 @@ private: ...@@ -762,17 +762,17 @@ private:
std::vector<mm_int4> posCellOffsets; std::vector<mm_int4> posCellOffsets;
cl::Buffer* pinnedBuffer; cl::Buffer* pinnedBuffer;
void* pinnedMemory; void* pinnedMemory;
OpenCLArray* posq; OpenCLArray posq;
OpenCLArray* posqCorrection; OpenCLArray posqCorrection;
OpenCLArray* velm; OpenCLArray velm;
OpenCLArray* force; OpenCLArray force;
OpenCLArray* forceBuffers; OpenCLArray forceBuffers;
OpenCLArray* longForceBuffer; OpenCLArray longForceBuffer;
OpenCLArray* energyBuffer; OpenCLArray energyBuffer;
OpenCLArray* energySum; OpenCLArray energySum;
OpenCLArray* energyParamDerivBuffer; OpenCLArray energyParamDerivBuffer;
OpenCLArray* atomIndexDevice; OpenCLArray atomIndexDevice;
OpenCLArray* chargeBuffer; OpenCLArray chargeBuffer;
std::vector<std::string> energyParamDerivNames; std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace; std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex; std::vector<int> atomIndex;
......
...@@ -243,9 +243,8 @@ private: ...@@ -243,9 +243,8 @@ private:
class OpenCLCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel { class OpenCLCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
public: public:
OpenCLCalcHarmonicBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcHarmonicBondForceKernel(name, platform), OpenCLCalcHarmonicBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcHarmonicBondForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) { hasInitializedKernel(false), cl(cl), system(system) {
} }
~OpenCLCalcHarmonicBondForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -276,7 +275,7 @@ private: ...@@ -276,7 +275,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray params;
}; };
/** /**
...@@ -285,7 +284,7 @@ private: ...@@ -285,7 +284,7 @@ private:
class OpenCLCalcCustomBondForceKernel : public CalcCustomBondForceKernel { class OpenCLCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public: public:
OpenCLCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomBondForceKernel(name, platform), OpenCLCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomBondForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
} }
~OpenCLCalcCustomBondForceKernel(); ~OpenCLCalcCustomBondForceKernel();
/** /**
...@@ -319,7 +318,7 @@ private: ...@@ -319,7 +318,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -330,9 +329,8 @@ private: ...@@ -330,9 +329,8 @@ private:
class OpenCLCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel { class OpenCLCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public: public:
OpenCLCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcHarmonicAngleForceKernel(name, platform), OpenCLCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcHarmonicAngleForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) { hasInitializedKernel(false), cl(cl), system(system) {
} }
~OpenCLCalcHarmonicAngleForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -363,7 +361,7 @@ private: ...@@ -363,7 +361,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray params;
}; };
/** /**
...@@ -372,7 +370,7 @@ private: ...@@ -372,7 +370,7 @@ private:
class OpenCLCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel { class OpenCLCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public: public:
OpenCLCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomAngleForceKernel(name, platform), OpenCLCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomAngleForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
} }
~OpenCLCalcCustomAngleForceKernel(); ~OpenCLCalcCustomAngleForceKernel();
/** /**
...@@ -406,7 +404,7 @@ private: ...@@ -406,7 +404,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -417,9 +415,8 @@ private: ...@@ -417,9 +415,8 @@ private:
class OpenCLCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel { class OpenCLCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public: public:
OpenCLCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcPeriodicTorsionForceKernel(name, platform), OpenCLCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcPeriodicTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) { hasInitializedKernel(false), cl(cl), system(system) {
} }
~OpenCLCalcPeriodicTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -450,7 +447,7 @@ private: ...@@ -450,7 +447,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray params;
}; };
/** /**
...@@ -459,9 +456,8 @@ private: ...@@ -459,9 +456,8 @@ private:
class OpenCLCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel { class OpenCLCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public: public:
OpenCLCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcRBTorsionForceKernel(name, platform), OpenCLCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcRBTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) { hasInitializedKernel(false), cl(cl), system(system) {
} }
~OpenCLCalcRBTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -492,7 +488,7 @@ private: ...@@ -492,7 +488,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLArray* params; OpenCLArray params;
}; };
/** /**
...@@ -501,9 +497,8 @@ private: ...@@ -501,9 +497,8 @@ private:
class OpenCLCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel { class OpenCLCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public: public:
OpenCLCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCMAPTorsionForceKernel(name, platform), OpenCLCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCMAPTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), coefficients(NULL), mapPositions(NULL), torsionMaps(NULL) { hasInitializedKernel(false), cl(cl), system(system) {
} }
~OpenCLCalcCMAPTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -535,9 +530,9 @@ private: ...@@ -535,9 +530,9 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
std::vector<mm_int2> mapPositionsVec; std::vector<mm_int2> mapPositionsVec;
OpenCLArray* coefficients; OpenCLArray coefficients;
OpenCLArray* mapPositions; OpenCLArray mapPositions;
OpenCLArray* torsionMaps; OpenCLArray torsionMaps;
}; };
/** /**
...@@ -546,7 +541,7 @@ private: ...@@ -546,7 +541,7 @@ private:
class OpenCLCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel { class OpenCLCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public: public:
OpenCLCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomTorsionForceKernel(name, platform), OpenCLCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
} }
~OpenCLCalcCustomTorsionForceKernel(); ~OpenCLCalcCustomTorsionForceKernel();
/** /**
...@@ -580,7 +575,7 @@ private: ...@@ -580,7 +575,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -591,10 +586,7 @@ private: ...@@ -591,10 +586,7 @@ private:
class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel { class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform), OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL), hasInitializedKernel(false), cl(cl), sort(NULL), fft(NULL), dispersionFft(NULL), pmeio(NULL) {
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL),
pmeDispersionBsplineModuliY(NULL), pmeDispersionBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeAtomRange(NULL),
pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), dispersionFft(NULL), pmeio(NULL) {
} }
~OpenCLCalcNonbondedForceKernel(); ~OpenCLCalcNonbondedForceKernel();
/** /**
...@@ -660,21 +652,21 @@ private: ...@@ -660,21 +652,21 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLArray* sigmaEpsilon; OpenCLArray sigmaEpsilon;
OpenCLArray* exceptionParams; OpenCLArray exceptionParams;
OpenCLArray* cosSinSums; OpenCLArray cosSinSums;
OpenCLArray* pmeGrid; OpenCLArray pmeGrid;
OpenCLArray* pmeGrid2; OpenCLArray pmeGrid2;
OpenCLArray* pmeBsplineModuliX; OpenCLArray pmeBsplineModuliX;
OpenCLArray* pmeBsplineModuliY; OpenCLArray pmeBsplineModuliY;
OpenCLArray* pmeBsplineModuliZ; OpenCLArray pmeBsplineModuliZ;
OpenCLArray* pmeDispersionBsplineModuliX; OpenCLArray pmeDispersionBsplineModuliX;
OpenCLArray* pmeDispersionBsplineModuliY; OpenCLArray pmeDispersionBsplineModuliY;
OpenCLArray* pmeDispersionBsplineModuliZ; OpenCLArray pmeDispersionBsplineModuliZ;
OpenCLArray* pmeBsplineTheta; OpenCLArray pmeBsplineTheta;
OpenCLArray* pmeAtomRange; OpenCLArray pmeAtomRange;
OpenCLArray* pmeAtomGridIndex; OpenCLArray pmeAtomGridIndex;
OpenCLArray* pmeEnergyBuffer; OpenCLArray pmeEnergyBuffer;
OpenCLSort* sort; OpenCLSort* sort;
cl::CommandQueue pmeQueue; cl::CommandQueue pmeQueue;
cl::Event pmeSyncEvent; cl::Event pmeSyncEvent;
...@@ -717,7 +709,7 @@ private: ...@@ -717,7 +709,7 @@ private:
class OpenCLCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel { class OpenCLCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public: public:
OpenCLCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomNonbondedForceKernel(name, platform), OpenCLCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomNonbondedForceKernel(name, platform),
cl(cl), params(NULL), globals(NULL), interactionGroupData(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) { cl(cl), params(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
} }
~OpenCLCalcCustomNonbondedForceKernel(); ~OpenCLCalcCustomNonbondedForceKernel();
/** /**
...@@ -749,13 +741,13 @@ private: ...@@ -749,13 +741,13 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
OpenCLArray* interactionGroupData; OpenCLArray interactionGroupData;
cl::Kernel interactionGroupKernel; cl::Kernel interactionGroupKernel;
std::vector<void*> interactionGroupArgs; std::vector<void*> interactionGroupArgs;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
double longRangeCoefficient; double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs; std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel, hasParamDerivs; bool hasInitializedLongRangeCorrection, hasInitializedKernel, hasParamDerivs;
...@@ -770,10 +762,8 @@ private: ...@@ -770,10 +762,8 @@ private:
class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel { class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
public: public:
OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl), OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl),
hasCreatedKernels(false), params(NULL), bornSum(NULL), longBornSum(NULL), bornRadii(NULL), bornForce(NULL), hasCreatedKernels(false) {
longBornForce(NULL), obcChain(NULL) {
} }
~OpenCLCalcGBSAOBCForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -804,13 +794,13 @@ private: ...@@ -804,13 +794,13 @@ private:
int maxTiles; int maxTiles;
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
OpenCLArray* params; OpenCLArray params;
OpenCLArray* bornSum; OpenCLArray bornSum;
OpenCLArray* longBornSum; OpenCLArray longBornSum;
OpenCLArray* bornRadii; OpenCLArray bornRadii;
OpenCLArray* bornForce; OpenCLArray bornForce;
OpenCLArray* longBornForce; OpenCLArray longBornForce;
OpenCLArray* obcChain; OpenCLArray obcChain;
cl::Kernel computeBornSumKernel; cl::Kernel computeBornSumKernel;
cl::Kernel reduceBornSumKernel; cl::Kernel reduceBornSumKernel;
cl::Kernel force1Kernel; cl::Kernel force1Kernel;
...@@ -823,8 +813,7 @@ private: ...@@ -823,8 +813,7 @@ private:
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel { class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public: public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomGBForceKernel(name, platform), OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), longEnergyDerivs(NULL), globals(NULL), hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), system(system) {
valueBuffers(NULL), longValueBuffers(NULL), system(system) {
} }
~OpenCLCalcCustomGBForceKernel(); ~OpenCLCalcCustomGBForceKernel();
/** /**
...@@ -862,14 +851,14 @@ private: ...@@ -862,14 +851,14 @@ private:
OpenCLParameterSet* energyDerivs; OpenCLParameterSet* energyDerivs;
OpenCLParameterSet* energyDerivChain; OpenCLParameterSet* energyDerivChain;
std::vector<OpenCLParameterSet*> dValuedParam; std::vector<OpenCLParameterSet*> dValuedParam;
std::vector<OpenCLArray*> dValue0dParam; std::vector<OpenCLArray> dValue0dParam;
OpenCLArray* longEnergyDerivs; OpenCLArray longEnergyDerivs;
OpenCLArray* globals; OpenCLArray globals;
OpenCLArray* valueBuffers; OpenCLArray valueBuffers;
OpenCLArray* longValueBuffers; OpenCLArray longValueBuffers;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue; std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
const System& system; const System& system;
cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel; cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
...@@ -883,7 +872,7 @@ private: ...@@ -883,7 +872,7 @@ private:
class OpenCLCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel { class OpenCLCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
public: public:
OpenCLCalcCustomExternalForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomExternalForceKernel(name, platform), OpenCLCalcCustomExternalForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomExternalForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) { hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
} }
~OpenCLCalcCustomExternalForceKernel(); ~OpenCLCalcCustomExternalForceKernel();
/** /**
...@@ -917,7 +906,7 @@ private: ...@@ -917,7 +906,7 @@ private:
ForceInfo* info; ForceInfo* info;
const System& system; const System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -928,8 +917,7 @@ private: ...@@ -928,8 +917,7 @@ private:
class OpenCLCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel { class OpenCLCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
public: public:
OpenCLCalcCustomHbondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomHbondForceKernel(name, platform), OpenCLCalcCustomHbondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomHbondForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL), hasInitializedKernel(false), cl(cl), donorParams(NULL), acceptorParams(NULL), system(system) {
donorBufferIndices(NULL), acceptorBufferIndices(NULL), globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL), system(system) {
} }
~OpenCLCalcCustomHbondForceKernel(); ~OpenCLCalcCustomHbondForceKernel();
/** /**
...@@ -963,16 +951,16 @@ private: ...@@ -963,16 +951,16 @@ private:
ForceInfo* info; ForceInfo* info;
OpenCLParameterSet* donorParams; OpenCLParameterSet* donorParams;
OpenCLParameterSet* acceptorParams; OpenCLParameterSet* acceptorParams;
OpenCLArray* globals; OpenCLArray globals;
OpenCLArray* donors; OpenCLArray donors;
OpenCLArray* acceptors; OpenCLArray acceptors;
OpenCLArray* donorBufferIndices; OpenCLArray donorBufferIndices;
OpenCLArray* acceptorBufferIndices; OpenCLArray acceptorBufferIndices;
OpenCLArray* donorExclusions; OpenCLArray donorExclusions;
OpenCLArray* acceptorExclusions; OpenCLArray acceptorExclusions;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
const System& system; const System& system;
cl::Kernel donorKernel, acceptorKernel; cl::Kernel donorKernel, acceptorKernel;
}; };
...@@ -983,7 +971,7 @@ private: ...@@ -983,7 +971,7 @@ private:
class OpenCLCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel { class OpenCLCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel {
public: public:
OpenCLCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomCentroidBondForceKernel(name, platform), OpenCLCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomCentroidBondForceKernel(name, platform),
cl(cl), params(NULL), globals(NULL), groupParticles(NULL), groupWeights(NULL), groupOffsets(NULL), groupForces(NULL), bondGroups(NULL), centerPositions(NULL), system(system) { cl(cl), params(NULL), system(system) {
} }
~OpenCLCalcCustomCentroidBondForceKernel(); ~OpenCLCalcCustomCentroidBondForceKernel();
/** /**
...@@ -1017,16 +1005,16 @@ private: ...@@ -1017,16 +1005,16 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
OpenCLArray* groupParticles; OpenCLArray groupParticles;
OpenCLArray* groupWeights; OpenCLArray groupWeights;
OpenCLArray* groupOffsets; OpenCLArray groupOffsets;
OpenCLArray* groupForces; OpenCLArray groupForces;
OpenCLArray* bondGroups; OpenCLArray bondGroups;
OpenCLArray* centerPositions; OpenCLArray centerPositions;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
cl::Kernel computeCentersKernel, groupForcesKernel, applyForcesKernel; cl::Kernel computeCentersKernel, groupForcesKernel, applyForcesKernel;
const System& system; const System& system;
}; };
...@@ -1037,7 +1025,7 @@ private: ...@@ -1037,7 +1025,7 @@ private:
class OpenCLCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel { class OpenCLCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
public: public:
OpenCLCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomCompoundBondForceKernel(name, platform), OpenCLCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomCompoundBondForceKernel(name, platform),
cl(cl), params(NULL), globals(NULL), system(system) { cl(cl), params(NULL), system(system) {
} }
~OpenCLCalcCustomCompoundBondForceKernel(); ~OpenCLCalcCustomCompoundBondForceKernel();
/** /**
...@@ -1070,10 +1058,10 @@ private: ...@@ -1070,10 +1058,10 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
const System& system; const System& system;
}; };
...@@ -1083,9 +1071,7 @@ private: ...@@ -1083,9 +1071,7 @@ private:
class OpenCLCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel { class OpenCLCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel {
public: public:
OpenCLCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomManyParticleForceKernel(name, platform), OpenCLCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomManyParticleForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), params(NULL), globals(NULL), particleTypes(NULL), orderIndex(NULL), particleOrder(NULL), exclusions(NULL), hasInitializedKernel(false), cl(cl), params(NULL), system(system) {
exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighborPairs(NULL), numNeighborPairs(NULL), neighborStartIndex(NULL),
numNeighborsForAtom(NULL), neighbors(NULL), system(system) {
} }
~OpenCLCalcCustomManyParticleForceKernel(); ~OpenCLCalcCustomManyParticleForceKernel();
/** /**
...@@ -1120,22 +1106,22 @@ private: ...@@ -1120,22 +1106,22 @@ private:
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize; int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray globals;
OpenCLArray* particleTypes; OpenCLArray particleTypes;
OpenCLArray* orderIndex; OpenCLArray orderIndex;
OpenCLArray* particleOrder; OpenCLArray particleOrder;
OpenCLArray* exclusions; OpenCLArray exclusions;
OpenCLArray* exclusionStartIndex; OpenCLArray exclusionStartIndex;
OpenCLArray* blockCenter; OpenCLArray blockCenter;
OpenCLArray* blockBoundingBox; OpenCLArray blockBoundingBox;
OpenCLArray* neighborPairs; OpenCLArray neighborPairs;
OpenCLArray* numNeighborPairs; OpenCLArray numNeighborPairs;
OpenCLArray* neighborStartIndex; OpenCLArray neighborStartIndex;
OpenCLArray* numNeighborsForAtom; OpenCLArray numNeighborsForAtom;
OpenCLArray* neighbors; OpenCLArray neighbors;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions;
const System& system; const System& system;
cl::Kernel forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel; cl::Kernel forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel;
}; };
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLContext.h"
#include "windowsExportOpenCL.h" #include "windowsExportOpenCL.h"
namespace OpenMM { namespace OpenMM {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012 Stanford University and the Authors. * * Portions copyright (c) 2012-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -25,14 +25,38 @@ ...@@ -25,14 +25,38 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLContext.h"
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
using namespace OpenMM; using namespace OpenMM;
OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) : OpenCLArray::OpenCLArray() : buffer(NULL), ownsBuffer(false) {
context(context), size(size), elementSize(elementSize), name(name), ownsBuffer(true) { }
OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) : buffer(NULL) {
initialize(context, size, elementSize, name, flags);
}
OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) : buffer(NULL) {
initialize(context, buffer, size, elementSize, name);
}
OpenCLArray::~OpenCLArray() {
if (buffer != NULL && ownsBuffer)
delete buffer;
}
void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) {
if (buffer != NULL)
throw OpenMMException("OpenCLArray has already been initialized");
this->context = &context;
this->size = size;
this->elementSize = elementSize;
this->name = name;
this->flags = flags;
ownsBuffer = true;
try { try {
buffer = new cl::Buffer(context.getContext(), flags, size*elementSize); buffer = new cl::Buffer(context.getContext(), flags, size*elementSize);
} }
...@@ -43,18 +67,32 @@ OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, cons ...@@ -43,18 +67,32 @@ OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, cons
} }
} }
OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) : void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) {
context(context), buffer(buffer), size(size), elementSize(elementSize), name(name), ownsBuffer(false) { if (this->buffer != NULL)
throw OpenMMException("OpenCLArray has already been initialized");
this->context = &context;
this->buffer = buffer;
this->size = size;
this->elementSize = elementSize;
this->name = name;
ownsBuffer = false;
} }
OpenCLArray::~OpenCLArray() { void OpenCLArray::resize(int size) {
if (ownsBuffer) if (buffer == NULL)
delete buffer; throw OpenMMException("OpenCLArray has not been initialized");
if (!ownsBuffer)
throw OpenMMException("Cannot resize an array that does not own its storage");
delete buffer;
buffer = NULL;
initialize(*context, size, elementSize, name, flags);
} }
void OpenCLArray::upload(const void* data, bool blocking) { void OpenCLArray::upload(const void* data, bool blocking) {
if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized");
try { try {
context.getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data); context->getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
...@@ -64,8 +102,10 @@ void OpenCLArray::upload(const void* data, bool blocking) { ...@@ -64,8 +102,10 @@ void OpenCLArray::upload(const void* data, bool blocking) {
} }
void OpenCLArray::download(void* data, bool blocking) const { void OpenCLArray::download(void* data, bool blocking) const {
if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized");
try { try {
context.getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data); context->getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
...@@ -75,10 +115,12 @@ void OpenCLArray::download(void* data, bool blocking) const { ...@@ -75,10 +115,12 @@ void OpenCLArray::download(void* data, bool blocking) const {
} }
void OpenCLArray::copyTo(OpenCLArray& dest) const { void OpenCLArray::copyTo(OpenCLArray& dest) const {
if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized");
if (dest.getSize() != size || dest.getElementSize() != elementSize) if (dest.getSize() != size || dest.getElementSize() != elementSize)
throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array"); throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array");
try { try {
context.getQueue().enqueueCopyBuffer(*buffer, dest.getDeviceBuffer(), 0, 0, size*elementSize); context->getQueue().enqueueCopyBuffer(*buffer, dest.getDeviceBuffer(), 0, 0, size*elementSize);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -68,9 +68,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -68,9 +68,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
} }
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energySum(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
chargeBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
useDoublePrecision = false; useDoublePrecision = false;
useMixedPrecision = false; useMixedPrecision = false;
...@@ -275,23 +274,23 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -275,23 +274,23 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize; numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
if (useDoublePrecision) { if (useDoublePrecision) {
posq = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "posq"); posq.initialize<mm_double4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm"); velm.initialize<mm_double4>(*this, paddedNumAtoms, "velm");
compilationDefines["USE_DOUBLE_PRECISION"] = "1"; compilationDefines["USE_DOUBLE_PRECISION"] = "1";
compilationDefines["convert_real4"] = "convert_double4"; compilationDefines["convert_real4"] = "convert_double4";
compilationDefines["convert_mixed4"] = "convert_double4"; compilationDefines["convert_mixed4"] = "convert_double4";
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
posq = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "posq"); posq.initialize<mm_float4>(*this, paddedNumAtoms, "posq");
posqCorrection = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "posq"); posqCorrection.initialize<mm_float4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm"); velm.initialize<mm_double4>(*this, paddedNumAtoms, "velm");
compilationDefines["USE_MIXED_PRECISION"] = "1"; compilationDefines["USE_MIXED_PRECISION"] = "1";
compilationDefines["convert_real4"] = "convert_float4"; compilationDefines["convert_real4"] = "convert_float4";
compilationDefines["convert_mixed4"] = "convert_double4"; compilationDefines["convert_mixed4"] = "convert_double4";
} }
else { else {
posq = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "posq"); posq.initialize<mm_float4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "velm"); velm.initialize<mm_float4>(*this, paddedNumAtoms, "velm");
compilationDefines["convert_real4"] = "convert_float4"; compilationDefines["convert_real4"] = "convert_float4";
compilationDefines["convert_mixed4"] = "convert_float4"; compilationDefines["convert_mixed4"] = "convert_float4";
} }
...@@ -429,28 +428,6 @@ OpenCLContext::~OpenCLContext() { ...@@ -429,28 +428,6 @@ OpenCLContext::~OpenCLContext() {
delete computation; delete computation;
if (pinnedBuffer != NULL) if (pinnedBuffer != NULL)
delete pinnedBuffer; delete pinnedBuffer;
if (posq != NULL)
delete posq;
if (posqCorrection != NULL)
delete posqCorrection;
if (velm != NULL)
delete velm;
if (force != NULL)
delete force;
if (forceBuffers != NULL)
delete forceBuffers;
if (longForceBuffer != NULL)
delete longForceBuffer;
if (energyBuffer != NULL)
delete energyBuffer;
if (energySum != NULL)
delete energySum;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (chargeBuffer != NULL)
delete chargeBuffer;
if (integration != NULL) if (integration != NULL)
delete integration; delete integration;
if (expression != NULL) if (expression != NULL)
...@@ -471,42 +448,42 @@ void OpenCLContext::initialize() { ...@@ -471,42 +448,42 @@ void OpenCLContext::initialize() {
numForceBuffers = std::max(numForceBuffers, force->getRequiredForceBuffers()); numForceBuffers = std::max(numForceBuffers, force->getRequiredForceBuffers());
int energyBufferSize = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()); int energyBufferSize = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers.initialize<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force.initialize<mm_double4>(*this, &forceBuffers.getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer"); energyBuffer.initialize<cl_double>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum"); energySum.initialize<cl_double>(*this, 1, "energySum");
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers.initialize<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force.initialize<mm_float4>(*this, &forceBuffers.getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer"); energyBuffer.initialize<cl_double>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum"); energySum.initialize<cl_double>(*this, 1, "energySum");
} }
else { else {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers.initialize<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force.initialize<mm_float4>(*this, &forceBuffers.getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_float>(*this, energyBufferSize, "energyBuffer"); energyBuffer.initialize<cl_float>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_float>(*this, 1, "energySum"); energySum.initialize<cl_float>(*this, 1, "energySum");
} }
if (supports64BitGlobalAtomics) { if (supports64BitGlobalAtomics) {
longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer"); longForceBuffer.initialize<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
reduceForcesKernel.setArg<cl::Buffer>(0, longForceBuffer->getDeviceBuffer()); reduceForcesKernel.setArg<cl::Buffer>(0, longForceBuffer.getDeviceBuffer());
reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers->getDeviceBuffer()); reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers.getDeviceBuffer());
reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms); reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms);
reduceForcesKernel.setArg<cl_int>(3, numForceBuffers); reduceForcesKernel.setArg<cl_int>(3, numForceBuffers);
addAutoclearBuffer(*longForceBuffer); addAutoclearBuffer(longForceBuffer);
} }
addAutoclearBuffer(*forceBuffers); addAutoclearBuffer(forceBuffers);
addAutoclearBuffer(*energyBuffer); addAutoclearBuffer(energyBuffer);
int numEnergyParamDerivs = energyParamDerivNames.size(); int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) { if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision) if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = OpenCLArray::create<cl_double>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer"); energyParamDerivBuffer.initialize<cl_double>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
else else
energyParamDerivBuffer = OpenCLArray::create<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer"); energyParamDerivBuffer.initialize<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer); addAutoclearBuffer(energyParamDerivBuffer);
} }
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBufferSize*energyBuffer->getElementSize()); int bufferBytes = max(velm.getSize()*velm.getElementSize(), energyBufferSize*energyBuffer.getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes); pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
...@@ -516,12 +493,12 @@ void OpenCLContext::initialize() { ...@@ -516,12 +493,12 @@ void OpenCLContext::initialize() {
else else
((mm_float4*) pinnedMemory)[i] = mm_float4(0.0f, 0.0f, 0.0f, mass == 0.0 ? 0.0f : (cl_float) (1.0/mass)); ((mm_float4*) pinnedMemory)[i] = mm_float4(0.0f, 0.0f, 0.0f, mass == 0.0 ? 0.0f : (cl_float) (1.0/mass));
} }
velm->upload(pinnedMemory); velm.upload(pinnedMemory);
atomIndexDevice = OpenCLArray::create<cl_int>(*this, paddedNumAtoms, "atomIndexDevice"); atomIndexDevice.initialize<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms); atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i) for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i; atomIndex[i] = i;
atomIndexDevice->upload(atomIndex); atomIndexDevice.upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
nonbonded->initialize(system); nonbonded->initialize(system);
} }
...@@ -756,7 +733,7 @@ void OpenCLContext::reduceForces() { ...@@ -756,7 +733,7 @@ void OpenCLContext::reduceForces() {
if (supports64BitGlobalAtomics) if (supports64BitGlobalAtomics)
executeKernel(reduceForcesKernel, paddedNumAtoms, 128); executeKernel(reduceForcesKernel, paddedNumAtoms, 128);
else else
reduceBuffer(*forceBuffers, numForceBuffers); reduceBuffer(forceBuffers, numForceBuffers);
} }
void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) { void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
...@@ -771,42 +748,42 @@ double OpenCLContext::reduceEnergy() { ...@@ -771,42 +748,42 @@ double OpenCLContext::reduceEnergy() {
int workGroupSize = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); int workGroupSize = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (workGroupSize > 512) if (workGroupSize > 512)
workGroupSize = 512; workGroupSize = 512;
reduceEnergyKernel.setArg<cl::Buffer>(0, energyBuffer->getDeviceBuffer()); reduceEnergyKernel.setArg<cl::Buffer>(0, energyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(1, energySum->getDeviceBuffer()); reduceEnergyKernel.setArg<cl::Buffer>(1, energySum.getDeviceBuffer());
reduceEnergyKernel.setArg<cl_int>(2, energyBuffer->getSize()); reduceEnergyKernel.setArg<cl_int>(2, energyBuffer.getSize());
reduceEnergyKernel.setArg<cl_int>(3, workGroupSize); reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
reduceEnergyKernel.setArg(4, workGroupSize*energyBuffer->getElementSize(), NULL); reduceEnergyKernel.setArg(4, workGroupSize*energyBuffer.getElementSize(), NULL);
executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize); executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
if (getUseDoublePrecision() || getUseMixedPrecision()) { if (getUseDoublePrecision() || getUseMixedPrecision()) {
double energy; double energy;
energySum->download(&energy); energySum.download(&energy);
return energy; return energy;
} }
else { else {
float energy; float energy;
energySum->download(&energy); energySum.download(&energy);
return energy; return energy;
} }
} }
void OpenCLContext::setCharges(const vector<double>& charges) { void OpenCLContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL) if (!chargeBuffer.isInitialized())
chargeBuffer = new OpenCLArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer"); chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) { if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer(); double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++) for (int i = 0; i < charges.size(); i++)
c[i] = charges[i]; c[i] = charges[i];
chargeBuffer->upload(c); chargeBuffer.upload(c);
} }
else { else {
float* c = (float*) getPinnedBuffer(); float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++) for (int i = 0; i < charges.size(); i++)
c[i] = (float) charges[i]; c[i] = (float) charges[i];
chargeBuffer->upload(c); chargeBuffer.upload(c);
} }
setChargesKernel.setArg<cl::Buffer>(0, chargeBuffer->getDeviceBuffer()); setChargesKernel.setArg<cl::Buffer>(0, chargeBuffer.getDeviceBuffer());
setChargesKernel.setArg<cl::Buffer>(1, posq->getDeviceBuffer()); setChargesKernel.setArg<cl::Buffer>(1, posq.getDeviceBuffer());
setChargesKernel.setArg<cl::Buffer>(2, atomIndexDevice->getDeviceBuffer()); setChargesKernel.setArg<cl::Buffer>(2, atomIndexDevice.getDeviceBuffer());
setChargesKernel.setArg<cl_int>(3, numAtoms); setChargesKernel.setArg<cl_int>(3, numAtoms);
executeKernel(setChargesKernel, numAtoms); executeKernel(setChargesKernel, numAtoms);
} }
...@@ -1069,16 +1046,16 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) { ...@@ -1069,16 +1046,16 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
vector<mm_double4> newPosq(paddedNumAtoms, mm_double4(0,0,0,0)); vector<mm_double4> newPosq(paddedNumAtoms, mm_double4(0,0,0,0));
vector<mm_double4> oldVelm(paddedNumAtoms); vector<mm_double4> oldVelm(paddedNumAtoms);
vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0)); vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
posq->download(oldPosq); posq.download(oldPosq);
velm->download(oldVelm); velm.download(oldVelm);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i]; int index = atomIndex[i];
newPosq[index] = oldPosq[i]; newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i]; newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i]; newCellOffsets[index] = posCellOffsets[i];
} }
posq->upload(newPosq); posq.upload(newPosq);
velm->upload(newVelm); velm.upload(newVelm);
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
vector<mm_float4> oldPosq(paddedNumAtoms); vector<mm_float4> oldPosq(paddedNumAtoms);
...@@ -1087,8 +1064,8 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) { ...@@ -1087,8 +1064,8 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
vector<mm_float4> newPosqCorrection(paddedNumAtoms, mm_float4(0,0,0,0)); vector<mm_float4> newPosqCorrection(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_double4> oldVelm(paddedNumAtoms); vector<mm_double4> oldVelm(paddedNumAtoms);
vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0)); vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
posq->download(oldPosq); posq.download(oldPosq);
velm->download(oldVelm); velm.download(oldVelm);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i]; int index = atomIndex[i];
newPosq[index] = oldPosq[i]; newPosq[index] = oldPosq[i];
...@@ -1096,31 +1073,31 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) { ...@@ -1096,31 +1073,31 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
newVelm[index] = oldVelm[i]; newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i]; newCellOffsets[index] = posCellOffsets[i];
} }
posq->upload(newPosq); posq.upload(newPosq);
posqCorrection->upload(newPosqCorrection); posqCorrection.upload(newPosqCorrection);
velm->upload(newVelm); velm.upload(newVelm);
} }
else { else {
vector<mm_float4> oldPosq(paddedNumAtoms); vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0)); vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_float4> oldVelm(paddedNumAtoms); vector<mm_float4> oldVelm(paddedNumAtoms);
vector<mm_float4> newVelm(paddedNumAtoms, mm_float4(0,0,0,0)); vector<mm_float4> newVelm(paddedNumAtoms, mm_float4(0,0,0,0));
posq->download(oldPosq); posq.download(oldPosq);
velm->download(oldVelm); velm.download(oldVelm);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i]; int index = atomIndex[i];
newPosq[index] = oldPosq[i]; newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i]; newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i]; newCellOffsets[index] = posCellOffsets[i];
} }
posq->upload(newPosq); posq.upload(newPosq);
velm->upload(newVelm); velm.upload(newVelm);
} }
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
atomIndex[i] = i; atomIndex[i] = i;
posCellOffsets[i] = newCellOffsets[i]; posCellOffsets[i] = newCellOffsets[i];
} }
atomIndexDevice->upload(atomIndex); atomIndexDevice.upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
for (auto listener : reorderListeners) for (auto listener : reorderListeners)
listener->execute(); listener->execute();
...@@ -1152,10 +1129,10 @@ void OpenCLContext::reorderAtomsImpl() { ...@@ -1152,10 +1129,10 @@ void OpenCLContext::reorderAtomsImpl() {
vector<Real4> oldPosq(paddedNumAtoms); vector<Real4> oldPosq(paddedNumAtoms);
vector<Real4> oldPosqCorrection(paddedNumAtoms); vector<Real4> oldPosqCorrection(paddedNumAtoms);
vector<Mixed4> oldVelm(paddedNumAtoms); vector<Mixed4> oldVelm(paddedNumAtoms);
posq->download(oldPosq); posq.download(oldPosq);
velm->download(oldVelm); velm.download(oldVelm);
if (useMixedPrecision) if (useMixedPrecision)
posqCorrection->download(oldPosqCorrection); posqCorrection.download(oldPosqCorrection);
Real minx = oldPosq[0].x, maxx = oldPosq[0].x; Real minx = oldPosq[0].x, maxx = oldPosq[0].x;
Real miny = oldPosq[0].y, maxy = oldPosq[0].y; Real miny = oldPosq[0].y, maxy = oldPosq[0].y;
Real minz = oldPosq[0].z, maxz = oldPosq[0].z; Real minz = oldPosq[0].z, maxz = oldPosq[0].z;
...@@ -1299,11 +1276,11 @@ void OpenCLContext::reorderAtomsImpl() { ...@@ -1299,11 +1276,11 @@ void OpenCLContext::reorderAtomsImpl() {
atomIndex[i] = originalIndex[i]; atomIndex[i] = originalIndex[i];
posCellOffsets[i] = newCellOffsets[i]; posCellOffsets[i] = newCellOffsets[i];
} }
posq->upload(newPosq); posq.upload(newPosq);
if (useMixedPrecision) if (useMixedPrecision)
posqCorrection->upload(newPosqCorrection); posqCorrection.upload(newPosqCorrection);
velm->upload(newVelm); velm.upload(newVelm);
atomIndexDevice->upload(atomIndex); atomIndexDevice.upload(atomIndex);
for (auto listener : reorderListeners) for (auto listener : reorderListeners)
listener->execute(); listener->execute();
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. * * Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -532,11 +532,6 @@ private: ...@@ -532,11 +532,6 @@ private:
const HarmonicBondForce& force; const HarmonicBondForce& force;
}; };
OpenCLCalcHarmonicBondForceKernel::~OpenCLCalcHarmonicBondForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) { void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts; int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
...@@ -545,18 +540,18 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H ...@@ -545,18 +540,18 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H
if (numBonds == 0) if (numBonds == 0)
return; return;
vector<vector<int> > atoms(numBonds, vector<int>(2)); vector<vector<int> > atoms(numBonds, vector<int>(2));
params = OpenCLArray::create<mm_float2>(cl, numBonds, "bondParams"); params.initialize<mm_float2>(cl, numBonds, "bondParams");
vector<mm_float2> paramVector(numBonds); vector<mm_float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) { for (int i = 0; i < numBonds; i++) {
double length, k; double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k); force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k); paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
} }
params->upload(paramVector); params.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicBondForce; replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicBondForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float2"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params.getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cl.addForce(info); cl.addForce(info);
...@@ -584,7 +579,7 @@ void OpenCLCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& con ...@@ -584,7 +579,7 @@ void OpenCLCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& con
force.getBondParameters(startIndex+i, atom1, atom2, length, k); force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k); paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
} }
params->upload(paramVector); params.upload(paramVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -623,8 +618,6 @@ private: ...@@ -623,8 +618,6 @@ private:
OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() { OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) { void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
...@@ -671,9 +664,9 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus ...@@ -671,9 +664,9 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals.getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]"; string value = argName+"["+cl.intToString(i)+"]";
...@@ -702,7 +695,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus ...@@ -702,7 +695,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
} }
double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -711,7 +704,7 @@ double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool inclu ...@@ -711,7 +704,7 @@ double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool inclu
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -770,11 +763,6 @@ private: ...@@ -770,11 +763,6 @@ private:
const HarmonicAngleForce& force; const HarmonicAngleForce& force;
}; };
OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) { void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts; int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
...@@ -783,7 +771,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const ...@@ -783,7 +771,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
if (numAngles == 0) if (numAngles == 0)
return; return;
vector<vector<int> > atoms(numAngles, vector<int>(3)); vector<vector<int> > atoms(numAngles, vector<int>(3));
params = OpenCLArray::create<mm_float2>(cl, numAngles, "angleParams"); params.initialize<mm_float2>(cl, numAngles, "angleParams");
vector<mm_float2> paramVector(numAngles); vector<mm_float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) { for (int i = 0; i < numAngles; i++) {
double angle, k; double angle, k;
...@@ -791,11 +779,11 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const ...@@ -791,11 +779,11 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k); paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
} }
params->upload(paramVector); params.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicAngleForce; replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float2"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params.getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cl.addForce(info); cl.addForce(info);
...@@ -823,7 +811,7 @@ void OpenCLCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& co ...@@ -823,7 +811,7 @@ void OpenCLCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& co
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k); force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k); paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
} }
params->upload(paramVector); params.upload(paramVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -863,8 +851,6 @@ private: ...@@ -863,8 +851,6 @@ private:
OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() { OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) { void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
...@@ -911,9 +897,9 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu ...@@ -911,9 +897,9 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
variables[name] = "angleParams"+params->getParameterSuffix(i); variables[name] = "angleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals.getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]"; string value = argName+"["+cl.intToString(i)+"]";
...@@ -942,7 +928,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu ...@@ -942,7 +928,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
} }
double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -951,7 +937,7 @@ double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool incl ...@@ -951,7 +937,7 @@ double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -1011,11 +997,6 @@ private: ...@@ -1011,11 +997,6 @@ private:
const PeriodicTorsionForce& force; const PeriodicTorsionForce& force;
}; };
OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) { void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...@@ -1024,7 +1005,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons ...@@ -1024,7 +1005,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params = OpenCLArray::create<mm_float4>(cl, numTorsions, "periodicTorsionParams"); params.initialize<mm_float4>(cl, numTorsions, "periodicTorsionParams");
vector<mm_float4> paramVector(numTorsions); vector<mm_float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
int periodicity; int periodicity;
...@@ -1032,11 +1013,11 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons ...@@ -1032,11 +1013,11 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k); force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f); paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
} }
params->upload(paramVector); params.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::periodicTorsionForce; replacements["COMPUTE_FORCE"] = OpenCLKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float4"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params.getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cl.addForce(info); cl.addForce(info);
...@@ -1064,7 +1045,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& ...@@ -1064,7 +1045,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl&
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k); force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f); paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
} }
params->upload(paramVector); params.upload(paramVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -1099,11 +1080,6 @@ private: ...@@ -1099,11 +1080,6 @@ private:
const RBTorsionForce& force; const RBTorsionForce& force;
}; };
OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) { void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...@@ -1112,7 +1088,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo ...@@ -1112,7 +1088,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params = OpenCLArray::create<mm_float8>(cl, numTorsions, "rbTorsionParams"); params.initialize<mm_float8>(cl, numTorsions, "rbTorsionParams");
vector<mm_float8> paramVector(numTorsions); vector<mm_float8> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5; double c0, c1, c2, c3, c4, c5;
...@@ -1120,11 +1096,11 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo ...@@ -1120,11 +1096,11 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f); paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
} }
params->upload(paramVector); params.upload(paramVector);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::rbTorsionForce; replacements["COMPUTE_FORCE"] = OpenCLKernelSources::rbTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float8"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params.getDeviceBuffer(), "float8");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cl.addForce(info); cl.addForce(info);
...@@ -1152,7 +1128,7 @@ void OpenCLCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -1152,7 +1128,7 @@ void OpenCLCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& contex
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5); force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f); paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
} }
params->upload(paramVector); params.upload(paramVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -1189,15 +1165,6 @@ private: ...@@ -1189,15 +1165,6 @@ private:
const CMAPTorsionForce& force; const CMAPTorsionForce& force;
}; };
OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) { void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...@@ -1228,17 +1195,17 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM ...@@ -1228,17 +1195,17 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM
vector<cl_int> torsionMapsVec(numTorsions); vector<cl_int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]); force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = OpenCLArray::create<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients"); coefficients.initialize<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = OpenCLArray::create<mm_int2>(cl, numMaps, "cmapTorsionMapPositions"); mapPositions.initialize<mm_int2>(cl, numMaps, "cmapTorsionMapPositions");
torsionMaps = OpenCLArray::create<cl_int>(cl, numTorsions, "cmapTorsionMaps"); torsionMaps.initialize<cl_int>(cl, numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec); coefficients.upload(coeffVec);
mapPositions->upload(mapPositionsVec); mapPositions.upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec); torsionMaps.upload(torsionMapsVec);
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cl.getBondedUtilities().addArgument(coefficients->getDeviceBuffer(), "float4"); replacements["COEFF"] = cl.getBondedUtilities().addArgument(coefficients.getDeviceBuffer(), "float4");
replacements["MAP_POS"] = cl.getBondedUtilities().addArgument(mapPositions->getDeviceBuffer(), "int2"); replacements["MAP_POS"] = cl.getBondedUtilities().addArgument(mapPositions.getDeviceBuffer(), "int2");
replacements["MAPS"] = cl.getBondedUtilities().addArgument(torsionMaps->getDeviceBuffer(), "int"); replacements["MAPS"] = cl.getBondedUtilities().addArgument(torsionMaps.getDeviceBuffer(), "int");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::cmapTorsionForce, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force); info = new ForceInfo(force);
cl.addForce(info); cl.addForce(info);
...@@ -1254,9 +1221,9 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont ...@@ -1254,9 +1221,9 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex; numTorsions = endIndex-startIndex;
if (mapPositions->getSize() != numMaps) if (mapPositions.getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed"); throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps->getSize() != numTorsions) if (torsionMaps.getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed"); throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps. // Update the maps.
...@@ -1279,7 +1246,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont ...@@ -1279,7 +1246,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15])); coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
} }
} }
coefficients->upload(coeffVec); coefficients.upload(coeffVec);
// Update the indices. // Update the indices.
...@@ -1288,7 +1255,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont ...@@ -1288,7 +1255,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
int index[8]; int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]); force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
} }
torsionMaps->upload(torsionMapsVec); torsionMaps.upload(torsionMapsVec);
} }
class OpenCLCalcCustomTorsionForceKernel::ForceInfo : public OpenCLForceInfo { class OpenCLCalcCustomTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
...@@ -1325,8 +1292,6 @@ private: ...@@ -1325,8 +1292,6 @@ private:
OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() { OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) { void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
...@@ -1373,9 +1338,9 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const ...@@ -1373,9 +1338,9 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
variables[name] = "torsionParams"+params->getParameterSuffix(i); variables[name] = "torsionParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals.getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]"; string value = argName+"["+cl.intToString(i)+"]";
...@@ -1404,7 +1369,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const ...@@ -1404,7 +1369,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
} }
double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -1413,7 +1378,7 @@ double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool in ...@@ -1413,7 +1378,7 @@ double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool in
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -1479,27 +1444,23 @@ private: ...@@ -1479,27 +1444,23 @@ private:
class OpenCLCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO { class OpenCLCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public: public:
PmeIO(OpenCLContext& cl, cl::Kernel addForcesKernel) : cl(cl), addForcesKernel(addForcesKernel), forceTemp(NULL) { PmeIO(OpenCLContext& cl, cl::Kernel addForcesKernel) : cl(cl), addForcesKernel(addForcesKernel) {
forceTemp = OpenCLArray::create<mm_float4>(cl, cl.getNumAtoms(), "PmeForce"); forceTemp.initialize<mm_float4>(cl, cl.getNumAtoms(), "PmeForce");
addForcesKernel.setArg<cl::Buffer>(0, forceTemp->getDeviceBuffer()); addForcesKernel.setArg<cl::Buffer>(0, forceTemp.getDeviceBuffer());
}
~PmeIO() {
if (forceTemp != NULL)
delete forceTemp;
} }
float* getPosq() { float* getPosq() {
cl.getPosq().download(posq); cl.getPosq().download(posq);
return (float*) &posq[0]; return (float*) &posq[0];
} }
void setForce(float* force) { void setForce(float* force) {
forceTemp->upload(force); forceTemp.upload(force);
addForcesKernel.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer()); addForcesKernel.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
cl.executeKernel(addForcesKernel, cl.getNumAtoms()); cl.executeKernel(addForcesKernel, cl.getNumAtoms());
} }
private: private:
OpenCLContext& cl; OpenCLContext& cl;
vector<mm_float4> posq; vector<mm_float4> posq;
OpenCLArray* forceTemp; OpenCLArray forceTemp;
cl::Kernel addForcesKernel; cl::Kernel addForcesKernel;
}; };
...@@ -1577,36 +1538,6 @@ private: ...@@ -1577,36 +1538,6 @@ private:
}; };
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() { OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (pmeGrid != NULL)
delete pmeGrid;
if (pmeGrid2 != NULL)
delete pmeGrid2;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -1635,7 +1566,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1635,7 +1566,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize nonbonded interactions. // Initialize nonbonded interactions.
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
sigmaEpsilon = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon.initialize<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
vector<mm_float4> posqf(cl.getPaddedNumAtoms(), mm_float4(0,0,0,0)); vector<mm_float4> posqf(cl.getPaddedNumAtoms(), mm_float4(0,0,0,0));
vector<mm_double4> posqd(cl.getPaddedNumAtoms(), mm_double4(0,0,0,0)); vector<mm_double4> posqd(cl.getPaddedNumAtoms(), mm_double4(0,0,0,0));
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms(), mm_float2(0,0)); vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms(), mm_float2(0,0));
...@@ -1671,7 +1602,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1671,7 +1602,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl.getPosq().upload(posqd); cl.getPosq().upload(posqd);
else else
cl.getPosq().upload(posqf); cl.getPosq().upload(posqf);
sigmaEpsilon->upload(sigmaEpsilonVector); sigmaEpsilon.upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod()); nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff); bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic); bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
...@@ -1726,7 +1657,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1726,7 +1657,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums"); ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces"); ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums"); cosSinSums.initialize(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
} }
} }
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) { else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
...@@ -1796,26 +1727,26 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1796,26 +1727,26 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int gridElements = gridSizeX*gridSizeY*gridSizeZ; int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME) if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ); gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
pmeGrid = new OpenCLArray(cl, gridElements, 2*elementSize, "pmeGrid"); pmeGrid.initialize(cl, gridElements, 2*elementSize, "pmeGrid");
pmeGrid2 = new OpenCLArray(cl, gridElements, 2*elementSize, "pmeGrid2"); pmeGrid2.initialize(cl, gridElements, 2*elementSize, "pmeGrid2");
if (cl.getSupports64BitGlobalAtomics()) if (cl.getSupports64BitGlobalAtomics())
cl.addAutoclearBuffer(*pmeGrid2); cl.addAutoclearBuffer(pmeGrid2);
else else
cl.addAutoclearBuffer(*pmeGrid); cl.addAutoclearBuffer(pmeGrid);
pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX"); pmeBsplineModuliX.initialize(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY"); pmeBsplineModuliY.initialize(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ"); pmeBsplineModuliZ.initialize(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) { if (doLJPME) {
pmeDispersionBsplineModuliX = new OpenCLArray(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX"); pmeDispersionBsplineModuliX.initialize(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY = new OpenCLArray(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY"); pmeDispersionBsplineModuliY.initialize(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ = new OpenCLArray(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ"); pmeDispersionBsplineModuliZ.initialize(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
} }
pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta"); pmeBsplineTheta.initialize(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange"); pmeAtomRange.initialize<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex"); pmeAtomGridIndex.initialize<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int energyElementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer = new OpenCLArray(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer"); pmeEnergyBuffer.initialize(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cl.clearBuffer(*pmeEnergyBuffer); cl.clearBuffer(pmeEnergyBuffer);
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms()); sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ, true); fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME) if (doLJPME)
...@@ -1832,7 +1763,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1832,7 +1763,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (recipForceGroup < 0) if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup(); recipForceGroup = force.getForceGroup();
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup)); cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup));
cl.addPostComputation(syncQueue = new SyncQueuePostComputation(cl, pmeSyncEvent, *pmeEnergyBuffer, recipForceGroup)); cl.addPostComputation(syncQueue = new SyncQueuePostComputation(cl, pmeSyncEvent, pmeEnergyBuffer, recipForceGroup));
} }
// Initialize the b-spline moduli. // Initialize the b-spline moduli.
...@@ -1844,9 +1775,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1844,9 +1775,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
xsize = gridSizeX; xsize = gridSizeX;
ysize = gridSizeY; ysize = gridSizeY;
zsize = gridSizeZ; zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX; xmoduli = &pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY; ymoduli = &pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ; zmoduli = &pmeBsplineModuliZ;
} }
else { else {
if (!doLJPME) if (!doLJPME)
...@@ -1854,9 +1785,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1854,9 +1785,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
xsize = dispersionGridSizeX; xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY; ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ; zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX; xmoduli = &pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY; ymoduli = &pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ; zmoduli = &pmeDispersionBsplineModuliZ;
} }
int maxSize = max(max(xsize, ysize), zsize); int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder); vector<double> data(PmeOrder);
...@@ -1938,7 +1869,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1938,7 +1869,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines); string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
if (hasLJ) if (hasLJ)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon->getDeviceBuffer())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon.getDeviceBuffer()));
// Initialize the exceptions. // Initialize the exceptions.
...@@ -1949,7 +1880,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1949,7 +1880,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (numExceptions > 0) { if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions); exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2)); vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = OpenCLArray::create<mm_float4>(cl, numExceptions, "exceptionParams"); exceptionParams.initialize<mm_float4>(cl, numExceptions, "exceptionParams");
vector<mm_float4> exceptionParamsVector(numExceptions); vector<mm_float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
...@@ -1957,9 +1888,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1957,9 +1888,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f); exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]); exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
} }
exceptionParams->upload(exceptionParamsVector); exceptionParams.upload(exceptionParamsVector);
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams->getDeviceBuffer(), "float4"); replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams.getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup()); cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
} }
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force); info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
...@@ -1970,15 +1901,15 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1970,15 +1901,15 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) { if (!hasInitializedKernel) {
hasInitializedKernel = true; hasInitializedKernel = true;
if (cosSinSums != NULL) { if (cosSinSums.isInitialized()) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer()); ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); ewaldSumsKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(2, cosSinSums->getDeviceBuffer()); ewaldSumsKernel.setArg<cl::Buffer>(2, cosSinSums.getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(0, cl.getForceBuffers().getDeviceBuffer()); ewaldForcesKernel.setArg<cl::Buffer>(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); ewaldForcesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums->getDeviceBuffer()); ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums.getDeviceBuffer());
} }
if (pmeGrid != NULL) { if (pmeGrid.isInitialized()) {
// Create kernels for Coulomb PME. // Create kernels for Coulomb PME.
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines); cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
...@@ -1991,39 +1922,39 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1991,39 +1922,39 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce"); pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta->getDeviceBuffer()); pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta.getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL); pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex->getDeviceBuffer()); pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex.getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer()); pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex.getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer()); pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange.getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer()); pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer()); pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex.getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); pmeZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex.getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange.getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) if (cl.getSupports64BitGlobalAtomics())
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2.getDeviceBuffer());
else else
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid.getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta.getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2.getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(1, pmeBsplineModuliX->getDeviceBuffer()); pmeConvolutionKernel.setArg<cl::Buffer>(1, pmeBsplineModuliX.getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliY->getDeviceBuffer()); pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliY.getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliZ->getDeviceBuffer()); pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliZ.getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2.getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer->getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer()); pmeEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer.getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX->getDeviceBuffer()); pmeEvalEnergyKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX.getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY->getDeviceBuffer()); pmeEvalEnergyKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY.getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ->getDeviceBuffer()); pmeEvalEnergyKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ.getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid.getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex.getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge"); pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2.getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid->getDeviceBuffer()); pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid.getDeviceBuffer());
} }
if (usePmeQueue) if (usePmeQueue)
syncQueue->setKernel(cl::Kernel(program, "addEnergy")); syncQueue->setKernel(cl::Kernel(program, "addEnergy"));
...@@ -2048,50 +1979,50 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2048,50 +1979,50 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeDispersionInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce"); pmeDispersionInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta->getDeviceBuffer()); pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta.getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL); pmeDispersionUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex->getDeviceBuffer()); pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex.getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(12, sigmaEpsilon->getDeviceBuffer()); pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(12, sigmaEpsilon.getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer()); pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex.getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer()); pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange.getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer()); pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer()); pmeDispersionZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex.getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); pmeDispersionZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex->getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex.getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange->getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange.getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) if (cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2->getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2.getDeviceBuffer());
else else
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid->getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid.getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta->getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta.getDeviceBuffer());
if (deviceIsCpu || cl.getSupports64BitGlobalAtomics()) if (deviceIsCpu || cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(13, sigmaEpsilon->getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(13, sigmaEpsilon.getDeviceBuffer());
else else
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(5, sigmaEpsilon->getDeviceBuffer()); pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(5, sigmaEpsilon.getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeDispersionConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2.getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(1, pmeDispersionBsplineModuliX->getDeviceBuffer()); pmeDispersionConvolutionKernel.setArg<cl::Buffer>(1, pmeDispersionBsplineModuliX.getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliY->getDeviceBuffer()); pmeDispersionConvolutionKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliY.getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliZ->getDeviceBuffer()); pmeDispersionConvolutionKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliZ.getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2.getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer->getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer()); pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer.getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliX->getDeviceBuffer()); pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliX.getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliY->getDeviceBuffer()); pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliY.getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(4, pmeDispersionBsplineModuliZ->getDeviceBuffer()); pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(4, pmeDispersionBsplineModuliZ.getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer()); pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer()); pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid.getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex->getDeviceBuffer()); pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex.getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(12, sigmaEpsilon->getDeviceBuffer()); pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(12, sigmaEpsilon.getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
pmeDispersionFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge"); pmeDispersionFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2.getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid->getDeviceBuffer()); pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid.getDeviceBuffer());
} }
} }
} }
} }
if (cosSinSums != NULL && includeReciprocal) { if (cosSinSums.isInitialized() && includeReciprocal) {
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble(); mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0); mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0);
double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z); double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
...@@ -2107,10 +2038,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2107,10 +2038,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0)); ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldForcesKernel.setArg<cl_float>(4, (cl_float) recipCoefficient); ewaldForcesKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
} }
cl.executeKernel(ewaldSumsKernel, cosSinSums->getSize()); cl.executeKernel(ewaldSumsKernel, cosSinSums.getSize());
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms()); cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
} }
if (pmeGrid != NULL && includeReciprocal) { if (pmeGrid.isInitialized() && includeReciprocal) {
if (usePmeQueue && !includeEnergy) if (usePmeQueue && !includeEnergy)
cl.setQueue(pmeQueue); cl.setQueue(pmeQueue);
...@@ -2157,7 +2088,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2157,7 +2088,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
} }
else { else {
sort->sort(*pmeAtomGridIndex); sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
...@@ -2184,7 +2115,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2184,7 +2115,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms()); cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
} }
} }
fft->execFFT(*pmeGrid, *pmeGrid2, true); fft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble(); mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]); pmeConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
...@@ -2205,7 +2136,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2205,7 +2136,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (includeEnergy) if (includeEnergy)
cl.executeKernel(pmeEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ); cl.executeKernel(pmeEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ); cl.executeKernel(pmeConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(*pmeGrid2, *pmeGrid, false); fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3); setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]); pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
...@@ -2236,7 +2167,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2236,7 +2167,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
} }
cl.executeKernel(pmeDispersionUpdateBsplinesKernel, cl.getNumAtoms()); cl.executeKernel(pmeDispersionUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) { if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(*pmeGrid); cl.clearBuffer(pmeGrid);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]); pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
...@@ -2251,9 +2182,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2251,9 +2182,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
} }
else { else {
sort->sort(*pmeAtomGridIndex); sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(*pmeGrid2); cl.clearBuffer(pmeGrid2);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]); pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
...@@ -2269,7 +2200,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2269,7 +2200,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ); cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
} }
else { else {
cl.clearBuffer(*pmeGrid); cl.clearBuffer(pmeGrid);
cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms()); cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2); setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2);
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision())
...@@ -2280,7 +2211,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2280,7 +2211,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, cl.getNumAtoms()); cl.executeKernel(pmeDispersionSpreadChargeKernel, cl.getNumAtoms());
} }
} }
dispersionFft->execFFT(*pmeGrid, *pmeGrid2, true); dispersionFft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble(); mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeDispersionConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]); pmeDispersionConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
...@@ -2301,7 +2232,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2301,7 +2232,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (includeEnergy) if (includeEnergy)
cl.executeKernel(pmeDispersionEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ); cl.executeKernel(pmeDispersionEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ); cl.executeKernel(pmeDispersionConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(*pmeGrid2, *pmeGrid, false); fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeDispersionInterpolateForceKernel, 3); setPeriodicBoxArgs(cl, pmeDispersionInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]); pmeDispersionInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
...@@ -2374,7 +2305,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -2374,7 +2305,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
cl.setCharges(chargeVector); cl.setCharges(chargeVector);
sigmaEpsilon->upload(sigmaEpsilonVector); sigmaEpsilon.upload(sigmaEpsilonVector);
// Record the exceptions. // Record the exceptions.
...@@ -2386,7 +2317,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -2386,7 +2317,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon); force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f); exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
} }
exceptionParams->upload(exceptionParamsVector); exceptionParams.upload(exceptionParamsVector);
} }
// Compute other values. // Compute other values.
...@@ -2473,12 +2404,6 @@ private: ...@@ -2473,12 +2404,6 @@ private:
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() { OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (auto function : tabulatedFunctions)
delete function;
if (forceCopy != NULL) if (forceCopy != NULL)
delete forceCopy; delete forceCopy;
} }
...@@ -2494,7 +2419,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2494,7 +2419,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters"); params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(numParticles); vector<vector<cl_float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -2519,7 +2444,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2519,7 +2444,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
vector<string> tableTypes; vector<string> tableTypes;
for (int i = 0; i < force.getNumFunctions(); i++) { tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
string arrayName = prefix+"table"+cl.intToString(i); string arrayName = prefix+"table"+cl.intToString(i);
...@@ -2527,9 +2453,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2527,9 +2453,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDeviceBuffer()));
if (width == 1) if (width == 1)
tableTypes.push_back("float"); tableTypes.push_back("float");
else else
...@@ -2544,8 +2470,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2544,8 +2470,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
globalParamNames[i] = force.getGlobalParameterName(i); globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i); globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
} }
if (globals != NULL) if (globals.isInitialized())
globals->upload(globalParamValues); globals.upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic); bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize(); Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
...@@ -2599,9 +2525,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2599,9 +2525,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+cl.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+cl.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
} }
if (globals != NULL) { if (globals.isInitialized()) {
globals->upload(globalParamValues); globals.upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals.getDeviceBuffer()));
} }
} }
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force); info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
...@@ -2785,8 +2711,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon ...@@ -2785,8 +2711,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
for (; indexInTileSet < 32; indexInTileSet++) for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(mm_int4(0, 0, minSize<<16, 0)); groupData.push_back(mm_int4(0, 0, minSize<<16, 0));
} }
interactionGroupData = OpenCLArray::create<mm_int4>(cl, groupData.size(), "interactionGroupData"); interactionGroupData.initialize<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData->upload(groupData); interactionGroupData.upload(groupData);
// Create the kernel. // Create the kernel.
...@@ -2812,7 +2738,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon ...@@ -2812,7 +2738,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
args<<", __global const "<<buffers[i].getType()<<"* restrict global_params"<<(i+1); args<<", __global const "<<buffers[i].getType()<<"* restrict global_params"<<(i+1);
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i; args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
if (globals != NULL) if (globals.isInitialized())
args<<", __global const float* restrict globals"; args<<", __global const float* restrict globals";
if (hasParamDerivs) if (hasParamDerivs)
args << ", __global mixed* restrict energyParamDerivs"; args << ", __global mixed* restrict energyParamDerivs";
...@@ -2883,7 +2809,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon ...@@ -2883,7 +2809,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
} }
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -2892,7 +2818,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool ...@@ -2892,7 +2818,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) { if (changed) {
globals->upload(globalParamValues); globals.upload(globalParamValues);
if (forceCopy != NULL) { if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs); CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
...@@ -2903,7 +2829,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool ...@@ -2903,7 +2829,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs); CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
} }
if (interactionGroupData != NULL) { if (interactionGroupData.isInitialized()) {
if (!hasInitializedKernel) { if (!hasInitializedKernel) {
hasInitializedKernel = true; hasInitializedKernel = true;
int index = 0; int index = 0;
...@@ -2911,14 +2837,14 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool ...@@ -2911,14 +2837,14 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer() : cl.getForceBuffers()).getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer() : cl.getForceBuffers()).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData->getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData.getDeviceBuffer());
index += 5; index += 5;
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory()); interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
interactionGroupKernel.setArg<cl::Memory>(index++, function->getDeviceBuffer()); interactionGroupKernel.setArg<cl::Memory>(index++, function.getDeviceBuffer());
if (globals != NULL) if (globals.isInitialized())
interactionGroupKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
if (hasParamDerivs) if (hasParamDerivs)
interactionGroupKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer()); interactionGroupKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
} }
...@@ -2978,43 +2904,26 @@ private: ...@@ -2978,43 +2904,26 @@ private:
const GBSAOBCForce& force; const GBSAOBCForce& force;
}; };
OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() {
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (longBornSum != NULL)
delete longBornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (longBornForce != NULL)
delete longBornForce;
if (obcChain != NULL)
delete obcChain;
}
void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) { void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
if (cl.getPlatformData().contexts.size() > 1) if (cl.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple OpenCL devices"); throw OpenMMException("GBSAOBCForce does not support using multiple OpenCL devices");
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
params = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams"); params.initialize<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
bornRadii = new OpenCLArray(cl, cl.getPaddedNumAtoms(), elementSize, "bornRadii"); bornRadii.initialize(cl, cl.getPaddedNumAtoms(), elementSize, "bornRadii");
obcChain = new OpenCLArray(cl, cl.getPaddedNumAtoms(), elementSize, "obcChain"); obcChain.initialize(cl, cl.getPaddedNumAtoms(), elementSize, "obcChain");
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
longBornSum = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum"); longBornSum.initialize<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum");
longBornForce = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce"); longBornForce.initialize<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce");
bornForce = new OpenCLArray(cl, cl.getPaddedNumAtoms(), elementSize, "bornForce"); bornForce.initialize(cl, cl.getPaddedNumAtoms(), elementSize, "bornForce");
cl.addAutoclearBuffer(*longBornSum); cl.addAutoclearBuffer(longBornSum);
cl.addAutoclearBuffer(*longBornForce); cl.addAutoclearBuffer(longBornForce);
} }
else { else {
bornSum = new OpenCLArray(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornSum"); bornSum.initialize(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornSum");
bornForce = new OpenCLArray(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornForce"); bornForce.initialize(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornForce");
cl.addAutoclearBuffer(*bornSum); cl.addAutoclearBuffer(bornSum);
cl.addAutoclearBuffer(*bornForce); cl.addAutoclearBuffer(bornForce);
} }
vector<mm_float4> posqf(cl.getPaddedNumAtoms()); vector<mm_float4> posqf(cl.getPaddedNumAtoms());
vector<mm_double4> posqd(cl.getPaddedNumAtoms()); vector<mm_double4> posqd(cl.getPaddedNumAtoms());
...@@ -3034,7 +2943,7 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB ...@@ -3034,7 +2943,7 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
cl.getPosq().upload(posqd); cl.getPosq().upload(posqd);
else else
cl.getPosq().upload(posqf); cl.getPosq().upload(posqf);
params->upload(paramsVector); params.upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric())); prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy(); surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
...@@ -3042,8 +2951,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB ...@@ -3042,8 +2951,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
cutoff = force.getCutoffDistance(); cutoff = force.getCutoffDistance();
string source = OpenCLKernelSources::gbsaObc2; string source = OpenCLKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup()); nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDeviceBuffer()));; nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params.getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce->getDeviceBuffer()));; nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce.getDeviceBuffer()));;
info = new ForceInfo(nb.getNumForceBuffers(), force); info = new ForceInfo(nb.getNumForceBuffers(), force);
cl.addForce(info); cl.addForce(info);
} }
...@@ -3089,9 +2998,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -3089,9 +2998,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
bool useLong = cl.getSupports64BitGlobalAtomics(); bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0; int index = 0;
computeBornSumKernel = cl::Kernel(program, "computeBornSum"); computeBornSumKernel = cl::Kernel(program, "computeBornSum");
computeBornSumKernel.setArg<cl::Buffer>(index++, (useLong ? longBornSum->getDeviceBuffer() : bornSum->getDeviceBuffer())); computeBornSumKernel.setArg<cl::Buffer>(index++, (useLong ? longBornSum.getDeviceBuffer() : bornSum.getDeviceBuffer()));
computeBornSumKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, params->getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, params.getDeviceBuffer());
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
...@@ -3107,10 +3016,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -3107,10 +3016,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel = cl::Kernel(program, "computeGBSAForce1"); force1Kernel = cl::Kernel(program, "computeGBSAForce1");
index = 0; index = 0;
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer())); force1Kernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? longBornForce->getDeviceBuffer() : bornForce->getDeviceBuffer())); force1Kernel.setArg<cl::Buffer>(index++, (useLong ? longBornForce.getDeviceBuffer() : bornForce.getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, bornRadii->getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, bornRadii.getDeviceBuffer());
index++; // Whether to include energy. index++; // Whether to include energy.
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
...@@ -3131,21 +3040,21 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -3131,21 +3040,21 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornSumKernel.setArg<cl_float>(2, 1.0f); reduceBornSumKernel.setArg<cl_float>(2, 1.0f);
reduceBornSumKernel.setArg<cl_float>(3, 0.8f); reduceBornSumKernel.setArg<cl_float>(3, 0.8f);
reduceBornSumKernel.setArg<cl_float>(4, 4.85f); reduceBornSumKernel.setArg<cl_float>(4, 4.85f);
reduceBornSumKernel.setArg<cl::Buffer>(5, (useLong ? longBornSum->getDeviceBuffer() : bornSum->getDeviceBuffer())); reduceBornSumKernel.setArg<cl::Buffer>(5, (useLong ? longBornSum.getDeviceBuffer() : bornSum.getDeviceBuffer()));
reduceBornSumKernel.setArg<cl::Buffer>(6, params->getDeviceBuffer()); reduceBornSumKernel.setArg<cl::Buffer>(6, params.getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(7, bornRadii->getDeviceBuffer()); reduceBornSumKernel.setArg<cl::Buffer>(7, bornRadii.getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(8, obcChain->getDeviceBuffer()); reduceBornSumKernel.setArg<cl::Buffer>(8, obcChain.getDeviceBuffer());
reduceBornForceKernel = cl::Kernel(program, "reduceBornForce"); reduceBornForceKernel = cl::Kernel(program, "reduceBornForce");
index = 0; index = 0;
reduceBornForceKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms()); reduceBornForceKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
reduceBornForceKernel.setArg<cl_int>(index++, nb.getNumForceBuffers()); reduceBornForceKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornForce->getDeviceBuffer()); reduceBornForceKernel.setArg<cl::Buffer>(index++, bornForce.getDeviceBuffer());
if (useLong) if (useLong)
reduceBornForceKernel.setArg<cl::Buffer>(index++, longBornForce->getDeviceBuffer()); reduceBornForceKernel.setArg<cl::Buffer>(index++, longBornForce.getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); reduceBornForceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, params->getDeviceBuffer()); reduceBornForceKernel.setArg<cl::Buffer>(index++, params.getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii->getDeviceBuffer()); reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii.getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain->getDeviceBuffer()); reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain.getDeviceBuffer());
} }
force1Kernel.setArg<cl_int>(5, includeEnergy); force1Kernel.setArg<cl_int>(5, includeEnergy);
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
...@@ -3188,7 +3097,7 @@ void OpenCLCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -3188,7 +3097,7 @@ void OpenCLCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context,
paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius)); paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
} }
cl.setCharges(chargeVector); cl.setCharges(chargeVector);
params->upload(paramsVector); params.upload(paramsVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -3235,18 +3144,6 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() { ...@@ -3235,18 +3144,6 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
delete energyDerivs; delete energyDerivs;
if (energyDerivChain != NULL) if (energyDerivChain != NULL)
delete energyDerivChain; delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
if (longValueBuffers != NULL)
delete longValueBuffers;
for (auto function : tabulatedFunctions)
delete function;
for (auto d : dValue0dParam)
delete d;
for (auto d : dValuedParam) for (auto d : dValuedParam)
delete d; delete d;
} }
...@@ -3284,7 +3181,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3284,7 +3181,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true); params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cl.getUseDoublePrecision()); computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cl.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(paddedNumParticles, vector<cl_float>(numParams, 0)); vector<vector<cl_float> > paramVector(paddedNumParticles, vector<cl_float>(numParams, 0));
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -3308,6 +3205,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3308,6 +3205,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream tableArgs; stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -3316,9 +3214,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3316,9 +3214,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDeviceBuffer()));
tableArgs << ", __global const float"; tableArgs << ", __global const float";
if (width > 1) if (width > 1)
tableArgs << width; tableArgs << width;
...@@ -3333,8 +3231,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3333,8 +3231,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
globalParamNames[i] = force.getGlobalParameterName(i); globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i); globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
} }
if (globals != NULL) if (globals.isInitialized())
globals->upload(globalParamValues); globals.upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms. // Record derivatives of expressions needed for the chain rule terms.
...@@ -3385,7 +3283,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3385,7 +3283,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
bool useLong = cl.getSupports64BitGlobalAtomics(); bool useLong = cl.getSupports64BitGlobalAtomics();
if (useLong) { if (useLong) {
longEnergyDerivs = OpenCLArray::create<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives"); longEnergyDerivs.initialize<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivatives", true); energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
} }
else else
...@@ -3393,13 +3291,14 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3393,13 +3291,14 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
energyDerivChain = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true); energyDerivChain = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0); needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) { for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "dValuedParam", true, cl.getUseDoublePrecision())); dValuedParam.push_back(new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "dValuedParam", true, cl.getUseDoublePrecision()));
if (useLong) if (useLong)
dValue0dParam.push_back(OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "dValue0dParam")); dValue0dParam[i].initialize<cl_long>(cl, cl.getPaddedNumAtoms(), "dValue0dParam");
else else
dValue0dParam.push_back(new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "dValue0dParam")); dValue0dParam[i].initialize(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "dValue0dParam");
cl.addAutoclearBuffer(*dValue0dParam.back()); cl.addAutoclearBuffer(dValue0dParam[i]);
string name = force.getEnergyParameterDerivativeName(i); string name = force.getEnergyParameterDerivativeName(i);
cl.addEnergyParameterDerivative(name); cl.addEnergyParameterDerivative(name);
} }
...@@ -4035,9 +3934,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -4035,9 +3934,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
} }
} }
if (globals != NULL) { if (globals.isInitialized()) {
globals->upload(globalParamValues); globals.upload(globalParamValues);
arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer())); arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals.getDeviceBuffer()));
} }
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup()); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (auto param : parameters) for (auto param : parameters)
...@@ -4048,7 +3947,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -4048,7 +3947,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force); info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
cl.addForce(info); cl.addForce(info);
if (useLong) if (useLong)
cl.addAutoclearBuffer(*longEnergyDerivs); cl.addAutoclearBuffer(longEnergyDerivs);
else { else {
for (auto& buffer : energyDerivs->getBuffers()) for (auto& buffer : energyDerivs->getBuffers())
cl.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects()); cl.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects());
...@@ -4099,21 +3998,21 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4099,21 +3998,21 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0); maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
bool useLong = cl.getSupports64BitGlobalAtomics(); bool useLong = cl.getSupports64BitGlobalAtomics();
if (useLong) { if (useLong) {
longValueBuffers = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers"); longValueBuffers.initialize<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers");
cl.addAutoclearBuffer(*longValueBuffers); cl.addAutoclearBuffer(longValueBuffers);
cl.clearBuffer(*longValueBuffers); cl.clearBuffer(longValueBuffers);
} }
else { else {
valueBuffers = new OpenCLArray(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "customGBValueBuffers"); valueBuffers.initialize(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "customGBValueBuffers");
cl.addAutoclearBuffer(*valueBuffers); cl.addAutoclearBuffer(valueBuffers);
cl.clearBuffer(*valueBuffers); cl.clearBuffer(valueBuffers);
} }
int index = 0; int index = 0;
pairValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*4*elementSize, NULL); pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*4*elementSize, NULL);
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusionTiles().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusionTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers->getDeviceBuffer() : valueBuffers->getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers.getDeviceBuffer() : valueBuffers.getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL); pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
...@@ -4126,8 +4025,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4126,8 +4025,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
} }
else else
pairValueKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2); pairValueKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
if (globals != NULL) if (globals.isInitialized())
pairValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i]) { if (pairValueUsesParam[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...@@ -4135,30 +4034,30 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4135,30 +4034,30 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL); pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL);
} }
} }
for (auto d : dValue0dParam) { for (auto& d : dValue0dParam) {
pairValueKernel.setArg<cl::Buffer>(index++, d->getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, d.getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*d->getElementSize(), NULL); pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*d.getElementSize(), NULL);
} }
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
pairValueKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
index = 0; index = 0;
perParticleValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms()); perParticleValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers()); perParticleValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); perParticleValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers->getDeviceBuffer() : valueBuffers->getDeviceBuffer()); perParticleValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers.getDeviceBuffer() : valueBuffers.getDeviceBuffer());
if (globals != NULL) if (globals.isInitialized())
perParticleValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); perParticleValueKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory()); perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers()) for (auto& buffer : computedValues->getBuffers())
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory()); perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (int i = 0; i < dValuedParam.size(); i++) { for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueKernel.setArg<cl::Memory>(index++, dValue0dParam[i]->getDeviceBuffer()); perParticleValueKernel.setArg<cl::Memory>(index++, dValue0dParam[i].getDeviceBuffer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++) for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueKernel.setArg<cl::Memory>(index++, dValuedParam[i]->getBuffers()[j].getMemory()); perParticleValueKernel.setArg<cl::Memory>(index++, dValuedParam[i]->getBuffers()[j].getMemory());
} }
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
perParticleValueKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); perParticleValueKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
index = 0; index = 0;
pairEnergyKernel.setArg<cl::Buffer>(index++, useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
...@@ -4179,8 +4078,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4179,8 +4078,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
} }
else else
pairEnergyKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2); pairEnergyKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
if (globals != NULL) if (globals.isInitialized())
pairEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i]) { if (pairEnergyUsesParam[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...@@ -4196,7 +4095,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4196,7 +4095,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
} }
} }
if (useLong) { if (useLong) {
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer()); pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs.getDeviceBuffer());
for (int i = 0; i < numComputedValues; ++i) for (int i = 0; i < numComputedValues; ++i)
pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL); pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
} }
...@@ -4208,16 +4107,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4208,16 +4107,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
} }
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
pairEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
pairEnergyKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
index = 0; index = 0;
perParticleEnergyKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms()); perParticleEnergyKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleEnergyKernel.setArg<cl_int>(index++, nb.getNumForceBuffers()); perParticleEnergyKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
if (globals != NULL) if (globals.isInitialized())
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory()); perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers()) for (auto& buffer : computedValues->getBuffers())
...@@ -4227,17 +4126,17 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4227,17 +4126,17 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
for (auto& buffer : energyDerivChain->getBuffers()) for (auto& buffer : energyDerivChain->getBuffers())
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory()); perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
if (useLong) if (useLong)
perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs.getDeviceBuffer());
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
perParticleEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
if (needParameterGradient || needEnergyParamDerivs) { if (needParameterGradient || needEnergyParamDerivs) {
index = 0; index = 0;
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer()); gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
if (globals != NULL) if (globals.isInitialized())
gradientChainRuleKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); gradientChainRuleKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
gradientChainRuleKernel.setArg<cl::Memory>(index++, buffer.getMemory()); gradientChainRuleKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers()) for (auto& buffer : computedValues->getBuffers())
...@@ -4252,7 +4151,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4252,7 +4151,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
} }
} }
} }
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -4261,7 +4160,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -4261,7 +4160,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
pairEnergyKernel.setArg<cl_int>(7, includeEnergy); pairEnergyKernel.setArg<cl_int>(7, includeEnergy);
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
...@@ -4342,8 +4241,6 @@ private: ...@@ -4342,8 +4241,6 @@ private:
OpenCLCalcCustomExternalForceKernel::~OpenCLCalcCustomExternalForceKernel() { OpenCLCalcCustomExternalForceKernel::~OpenCLCalcCustomExternalForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
} }
void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) { void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
...@@ -4398,9 +4295,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const ...@@ -4398,9 +4295,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
variables[name] = "particleParams"+params->getParameterSuffix(i); variables[name] = "particleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals.getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]"; string value = argName+"["+cl.intToString(i)+"]";
...@@ -4422,7 +4319,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const ...@@ -4422,7 +4319,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
} }
double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -4431,7 +4328,7 @@ double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool i ...@@ -4431,7 +4328,7 @@ double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool i
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -4544,22 +4441,6 @@ OpenCLCalcCustomHbondForceKernel::~OpenCLCalcCustomHbondForceKernel() { ...@@ -4544,22 +4441,6 @@ OpenCLCalcCustomHbondForceKernel::~OpenCLCalcCustomHbondForceKernel() {
delete donorParams; delete donorParams;
if (acceptorParams != NULL) if (acceptorParams != NULL)
delete acceptorParams; delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (donorBufferIndices != NULL)
delete donorBufferIndices;
if (acceptorBufferIndices != NULL)
delete acceptorBufferIndices;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
for (auto function : tabulatedFunctions)
delete function;
} }
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) { static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
...@@ -4586,12 +4467,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4586,12 +4467,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
if (numDonors == 0 || numAcceptors == 0) if (numDonors == 0 || numAcceptors == 0)
return; return;
int numParticles = system.getNumParticles(); int numParticles = system.getNumParticles();
donors = OpenCLArray::create<mm_int4>(cl, numDonors, "customHbondDonors"); donors.initialize<mm_int4>(cl, numDonors, "customHbondDonors");
acceptors = OpenCLArray::create<mm_int4>(cl, numAcceptors, "customHbondAcceptors"); acceptors.initialize<mm_int4>(cl, numAcceptors, "customHbondAcceptors");
donorParams = new OpenCLParameterSet(cl, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters"); donorParams = new OpenCLParameterSet(cl, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new OpenCLParameterSet(cl, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters"); acceptorParams = new OpenCLParameterSet(cl, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > donorParamVector(numDonors); vector<vector<cl_float> > donorParamVector(numDonors);
vector<mm_int4> donorVector(numDonors); vector<mm_int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) { for (int i = 0; i < numDonors; i++) {
...@@ -4601,7 +4482,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4601,7 +4482,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int j = 0; j < (int) parameters.size(); j++) for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (cl_float) parameters[j]; donorParamVector[i][j] = (cl_float) parameters[j];
} }
donors->upload(donorVector); donors.upload(donorVector);
donorParams->setParameterValues(donorParamVector); donorParams->setParameterValues(donorParamVector);
vector<vector<cl_float> > acceptorParamVector(numAcceptors); vector<vector<cl_float> > acceptorParamVector(numAcceptors);
vector<mm_int4> acceptorVector(numAcceptors); vector<mm_int4> acceptorVector(numAcceptors);
...@@ -4612,13 +4493,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4612,13 +4493,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int j = 0; j < (int) parameters.size(); j++) for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (cl_float) parameters[j]; acceptorParamVector[i][j] = (cl_float) parameters[j];
} }
acceptors->upload(acceptorVector); acceptors.upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector); acceptorParams->setParameterValues(acceptorParamVector);
// Select an output buffer index for each donor and acceptor. // Select an output buffer index for each donor and acceptor.
donorBufferIndices = OpenCLArray::create<mm_int4>(cl, numDonors, "customHbondDonorBuffers"); donorBufferIndices.initialize<mm_int4>(cl, numDonors, "customHbondDonorBuffers");
acceptorBufferIndices = OpenCLArray::create<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers"); acceptorBufferIndices.initialize<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers");
vector<mm_int4> donorBufferVector(numDonors); vector<mm_int4> donorBufferVector(numDonors);
vector<mm_int4> acceptorBufferVector(numAcceptors); vector<mm_int4> acceptorBufferVector(numAcceptors);
vector<int> donorBufferCounter(numParticles, 0); vector<int> donorBufferCounter(numParticles, 0);
...@@ -4631,8 +4512,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4631,8 +4512,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0, acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0,
acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0, acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0,
acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0); acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0);
donorBufferIndices->upload(donorBufferVector); donorBufferIndices.upload(donorBufferVector);
acceptorBufferIndices->upload(acceptorBufferVector); acceptorBufferIndices.upload(acceptorBufferVector);
int maxBuffers = 1; int maxBuffers = 1;
for (int i : donorBufferCounter) for (int i : donorBufferCounter)
maxBuffers = max(maxBuffers, i); maxBuffers = max(maxBuffers, i);
...@@ -4672,10 +4553,10 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4672,10 +4553,10 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
else else
throw OpenMMException("CustomHbondForce: OpenCLPlatform does not support more than four exclusions per acceptor"); throw OpenMMException("CustomHbondForce: OpenCLPlatform does not support more than four exclusions per acceptor");
} }
donorExclusions = OpenCLArray::create<mm_int4>(cl, numDonors, "customHbondDonorExclusions"); donorExclusions.initialize<mm_int4>(cl, numDonors, "customHbondDonorExclusions");
acceptorExclusions = OpenCLArray::create<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions"); acceptorExclusions.initialize<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions->upload(donorExclusionVector); donorExclusions.upload(donorExclusionVector);
acceptorExclusions->upload(acceptorExclusionVector); acceptorExclusions.upload(acceptorExclusionVector);
// Record the tabulated functions. // Record the tabulated functions.
...@@ -4683,6 +4564,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4683,6 +4564,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream tableArgs; stringstream tableArgs;
tabulatedFunctions.resize(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) { for (int i = 0; i < force.getNumFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -4691,8 +4573,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4691,8 +4573,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
tableArgs << ", __global const float"; tableArgs << ", __global const float";
if (width > 1) if (width > 1)
tableArgs << width; tableArgs << width;
...@@ -4707,8 +4589,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4707,8 +4589,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
globalParamNames[i] = force.getGlobalParameterName(i); globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i); globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
} }
if (globals != NULL) if (globals.isInitialized())
globals->upload(globalParamValues); globals.upload(globalParamValues);
map<string, string> variables; map<string, string> variables;
for (int i = 0; i < force.getNumPerDonorParameters(); i++) { for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const string& name = force.getPerDonorParameterName(i); const string& name = force.getPerDonorParameterName(i);
...@@ -4900,7 +4782,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4900,7 +4782,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numDonors == 0 || numAcceptors == 0) if (numDonors == 0 || numAcceptors == 0)
return 0.0; return 0.0;
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -4909,7 +4791,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl ...@@ -4909,7 +4791,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
if (!hasInitializedKernel) { if (!hasInitializedKernel) {
hasInitializedKernel = true; hasInitializedKernel = true;
...@@ -4917,38 +4799,38 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl ...@@ -4917,38 +4799,38 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
donorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorExclusions->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, donorExclusions.getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donors->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, donors.getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, acceptors.getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices.getDeviceBuffer());
donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL); donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 5; // Periodic box size arguments are set when the kernel is executed. index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals != NULL) if (globals.isInitialized())
donorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (auto& buffer : donorParams->getBuffers()) for (auto& buffer : donorParams->getBuffers())
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory()); donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers()) for (auto& buffer : acceptorParams->getBuffers())
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory()); donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
donorKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
index = 0; index = 0;
acceptorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorExclusions->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, acceptorExclusions.getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, donors->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, donors.getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, acceptors.getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices.getDeviceBuffer());
acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL); acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 5; // Periodic box size arguments are set when the kernel is executed. index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals != NULL) if (globals.isInitialized())
acceptorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (auto& buffer : donorParams->getBuffers()) for (auto& buffer : donorParams->getBuffers())
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory()); acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers()) for (auto& buffer : acceptorParams->getBuffers())
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory()); acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
acceptorKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
} }
setPeriodicBoxArgs(cl, donorKernel, 8); setPeriodicBoxArgs(cl, donorKernel, 8);
cl.executeKernel(donorKernel, max(numDonors, numAcceptors)); cl.executeKernel(donorKernel, max(numDonors, numAcceptors));
...@@ -5047,22 +4929,6 @@ private: ...@@ -5047,22 +4929,6 @@ private:
OpenCLCalcCustomCentroidBondForceKernel::~OpenCLCalcCustomCentroidBondForceKernel() { OpenCLCalcCustomCentroidBondForceKernel::~OpenCLCalcCustomCentroidBondForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (auto function : tabulatedFunctions)
delete function;
} }
void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) { void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
...@@ -5100,22 +4966,22 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c ...@@ -5100,22 +4966,22 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
for (int j = 0; j < normalizedWeights[i].size(); j++) for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]); groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
} }
groupParticles = OpenCLArray::create<int>(cl, groupParticleVec.size(), "groupParticles"); groupParticles.initialize<int>(cl, groupParticleVec.size(), "groupParticles");
groupParticles->upload(groupParticleVec); groupParticles.upload(groupParticleVec);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
groupWeights = OpenCLArray::create<double>(cl, groupParticleVec.size(), "groupWeights"); groupWeights.initialize<double>(cl, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecDouble); groupWeights.upload(groupWeightVecDouble);
centerPositions = OpenCLArray::create<mm_double4>(cl, numGroups, "centerPositions"); centerPositions.initialize<mm_double4>(cl, numGroups, "centerPositions");
} }
else { else {
groupWeights = OpenCLArray::create<float>(cl, groupParticleVec.size(), "groupWeights"); groupWeights.initialize<float>(cl, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecFloat); groupWeights.upload(groupWeightVecFloat);
centerPositions = OpenCLArray::create<mm_float4>(cl, numGroups, "centerPositions"); centerPositions.initialize<mm_float4>(cl, numGroups, "centerPositions");
} }
groupOffsets = OpenCLArray::create<int>(cl, groupOffsetVec.size(), "groupOffsets"); groupOffsets.initialize<int>(cl, groupOffsetVec.size(), "groupOffsets");
groupOffsets->upload(groupOffsetVec); groupOffsets.upload(groupOffsetVec);
groupForces = OpenCLArray::create<long long>(cl, numGroups*3, "groupForces"); groupForces.initialize<long long>(cl, numGroups*3, "groupForces");
cl.addAutoclearBuffer(*groupForces); cl.addAutoclearBuffer(groupForces);
// Record the bonds. // Record the bonds.
...@@ -5134,8 +5000,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c ...@@ -5134,8 +5000,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
paramVector[i][j] = (float) parameters[j]; paramVector[i][j] = (float) parameters[j];
} }
params->setParameterValues(paramVector); params->setParameterValues(paramVector);
bondGroups = OpenCLArray::create<int>(cl, bondGroupVec.size(), "bondGroups"); bondGroups.initialize<int>(cl, bondGroupVec.size(), "bondGroups");
bondGroups->upload(bondGroupVec); bondGroups.upload(bondGroupVec);
// Record the tabulated functions. // Record the tabulated functions.
...@@ -5143,6 +5009,7 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c ...@@ -5143,6 +5009,7 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream extraArgs; stringstream extraArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -5151,8 +5018,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c ...@@ -5151,8 +5018,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions.back()->upload(f); tabulatedFunctions[i].upload(f);
extraArgs << ", __global const float"; extraArgs << ", __global const float";
if (width > 1) if (width > 1)
extraArgs << width; extraArgs << width;
...@@ -5182,8 +5049,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c ...@@ -5182,8 +5049,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
extraArgs << ", __global mixed* restrict energyParamDerivs"; extraArgs << ", __global mixed* restrict energyParamDerivs";
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals"); globals.initialize<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals->upload(globalParamValues); globals.upload(globalParamValues);
extraArgs << ", __global const float* restrict globals"; extraArgs << ", __global const float* restrict globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
...@@ -5394,37 +5261,37 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c ...@@ -5394,37 +5261,37 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
index = 0; index = 0;
computeCentersKernel = cl::Kernel(program, "computeGroupCenters"); computeCentersKernel = cl::Kernel(program, "computeGroupCenters");
computeCentersKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); computeCentersKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles->getDeviceBuffer()); computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles.getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights->getDeviceBuffer()); computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights.getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets->getDeviceBuffer()); computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets.getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions->getDeviceBuffer()); computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions.getDeviceBuffer());
index = 0; index = 0;
groupForcesKernel = cl::Kernel(program, "computeGroupForces"); groupForcesKernel = cl::Kernel(program, "computeGroupForces");
groupForcesKernel.setArg<cl::Buffer>(index++, groupForces->getDeviceBuffer()); groupForcesKernel.setArg<cl::Buffer>(index++, groupForces.getDeviceBuffer());
index++; // Energy buffer hasn't been created yet index++; // Energy buffer hasn't been created yet
groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions->getDeviceBuffer()); groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions.getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups->getDeviceBuffer()); groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups.getDeviceBuffer());
index += 5; // Periodic box information index += 5; // Periodic box information
if (needEnergyParamDerivs) if (needEnergyParamDerivs)
index++; // Deriv buffer hasn't been created yet. index++; // Deriv buffer hasn't been created yet.
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
groupForcesKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); groupForcesKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
if (globals != NULL) if (globals.isInitialized())
groupForcesKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); groupForcesKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
groupForcesKernel.setArg<cl::Memory>(index++, buffer.getMemory()); groupForcesKernel.setArg<cl::Memory>(index++, buffer.getMemory());
index = 0; index = 0;
applyForcesKernel = cl::Kernel(program, "applyForcesToAtoms"); applyForcesKernel = cl::Kernel(program, "applyForcesToAtoms");
applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles->getDeviceBuffer()); applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles.getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights->getDeviceBuffer()); applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights.getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets->getDeviceBuffer()); applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets.getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupForces->getDeviceBuffer()); applyForcesKernel.setArg<cl::Buffer>(index++, groupForces.getDeviceBuffer());
} }
double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0) if (numBonds == 0)
return 0.0; return 0.0;
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]); float value = (float) context.getParameter(globalParamNames[i]);
...@@ -5433,7 +5300,7 @@ double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bo ...@@ -5433,7 +5300,7 @@ double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
cl.executeKernel(computeCentersKernel, OpenCLContext::TileSize*numGroups); cl.executeKernel(computeCentersKernel, OpenCLContext::TileSize*numGroups);
groupForcesKernel.setArg<cl::Buffer>(1, cl.getEnergyBuffer().getDeviceBuffer()); groupForcesKernel.setArg<cl::Buffer>(1, cl.getEnergyBuffer().getDeviceBuffer());
...@@ -5498,10 +5365,6 @@ private: ...@@ -5498,10 +5365,6 @@ private:
OpenCLCalcCustomCompoundBondForceKernel::~OpenCLCalcCustomCompoundBondForceKernel() { OpenCLCalcCustomCompoundBondForceKernel::~OpenCLCalcCustomCompoundBondForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
for (auto function : tabulatedFunctions)
delete function;
} }
void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) { void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
...@@ -5532,16 +5395,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c ...@@ -5532,16 +5395,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream tableArgs; stringstream tableArgs;
for (int i = 0; i < force.getNumFunctions(); i++) { tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
OpenCLArray* array = OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction"); tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions.push_back(array); tabulatedFunctions[i].upload(f);
array->upload(f); string arrayName = cl.getBondedUtilities().addArgument(tabulatedFunctions[i].getDeviceBuffer(), width == 1 ? "float" : "float"+cl.intToString(width));
string arrayName = cl.getBondedUtilities().addArgument(array->getDeviceBuffer(), width == 1 ? "float" : "float"+cl.intToString(width));
functionDefinitions.push_back(make_pair(name, arrayName)); functionDefinitions.push_back(make_pair(name, arrayName));
} }
...@@ -5565,9 +5428,9 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c ...@@ -5565,9 +5428,9 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals.upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals.getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]"; string value = argName+"["+cl.intToString(i)+"]";
...@@ -5750,7 +5613,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c ...@@ -5750,7 +5613,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
} }
double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -5759,7 +5622,7 @@ double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bo ...@@ -5759,7 +5622,7 @@ double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
return 0.0; return 0.0;
} }
...@@ -5827,34 +5690,6 @@ private: ...@@ -5827,34 +5690,6 @@ private:
OpenCLCalcCustomManyParticleForceKernel::~OpenCLCalcCustomManyParticleForceKernel() { OpenCLCalcCustomManyParticleForceKernel::~OpenCLCalcCustomManyParticleForceKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL)
delete globals;
if (orderIndex != NULL)
delete orderIndex;
if (particleOrder != NULL)
delete particleOrder;
if (particleTypes != NULL)
delete particleTypes;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighborPairs != NULL)
delete neighborPairs;
if (numNeighborPairs != NULL)
delete numNeighborPairs;
if (neighborStartIndex != NULL)
delete neighborStartIndex;
if (neighbors != NULL)
delete neighbors;
if (numNeighborsForAtom != NULL)
delete numNeighborsForAtom;
for (auto function : tabulatedFunctions)
delete function;
} }
void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) { void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
...@@ -5889,6 +5724,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -5889,6 +5724,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions; vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList; vector<const TabulatedFunction*> functionList;
stringstream tableArgs; stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) { for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i)); functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i); string name = force.getTabulatedFunctionName(i);
...@@ -5897,8 +5733,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -5897,8 +5733,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i)); functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width; int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width); vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction")); tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[i].upload(f);
tableArgs << ", __global const float"; tableArgs << ", __global const float";
if (width > 1) if (width > 1)
tableArgs << width; tableArgs << width;
...@@ -5928,8 +5764,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -5928,8 +5764,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
} }
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customManyParticleGlobals", CL_MEM_READ_ONLY); globals.initialize<cl_float>(cl, force.getNumGlobalParameters(), "customManyParticleGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals.upload(globalParamValues);
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i); const string& name = force.getGlobalParameterName(i);
string value = "globals["+cl.intToString(i)+"]"; string value = "globals["+cl.intToString(i)+"]";
...@@ -5946,16 +5782,16 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -5946,16 +5782,16 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec); CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
bool hasTypeFilters = (particleOrderVec.size() > 1); bool hasTypeFilters = (particleOrderVec.size() > 1);
if (hasTypeFilters) { if (hasTypeFilters) {
particleTypes = OpenCLArray::create<int>(cl, particleTypesVec.size(), "customManyParticleTypes"); particleTypes.initialize<int>(cl, particleTypesVec.size(), "customManyParticleTypes");
orderIndex = OpenCLArray::create<int>(cl, orderIndexVec.size(), "customManyParticleOrderIndex"); orderIndex.initialize<int>(cl, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder = OpenCLArray::create<int>(cl, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder"); particleOrder.initialize<int>(cl, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes->upload(particleTypesVec); particleTypes.upload(particleTypesVec);
orderIndex->upload(orderIndexVec); orderIndex.upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder->getSize()); vector<int> flattenedOrder(particleOrder.getSize());
for (int i = 0; i < (int) particleOrderVec.size(); i++) for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int j = 0; j < particlesPerSet; j++) for (int j = 0; j < particlesPerSet; j++)
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j]; flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
particleOrder->upload(flattenedOrder); particleOrder.upload(flattenedOrder);
} }
// Build data structures for exclusions. // Build data structures for exclusions.
...@@ -5976,10 +5812,10 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -5976,10 +5812,10 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end()); exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionStartIndexVec[i+1] = exclusionsVec.size(); exclusionStartIndexVec[i+1] = exclusionsVec.size();
} }
exclusions = OpenCLArray::create<int>(cl, exclusionsVec.size(), "customManyParticleExclusions"); exclusions.initialize<int>(cl, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex = OpenCLArray::create<int>(cl, exclusionStartIndexVec.size(), "customManyParticleExclusionStart"); exclusionStartIndex.initialize<int>(cl, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions->upload(exclusionsVec); exclusions.upload(exclusionsVec);
exclusionStartIndex->upload(exclusionStartIndexVec); exclusionStartIndex.upload(exclusionStartIndexVec);
} }
// Build data structures for the neighbor list. // Build data structures for the neighbor list.
...@@ -5987,18 +5823,18 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -5987,18 +5823,18 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
int numAtomBlocks = cl.getNumAtomBlocks(); int numAtomBlocks = cl.getNumAtomBlocks();
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter"); blockCenter.initialize(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox"); blockBoundingBox.initialize(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs = OpenCLArray::create<int>(cl, 1, "customManyParticleNumNeighborPairs"); numNeighborPairs.initialize<int>(cl, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex = OpenCLArray::create<int>(cl, numParticles+1, "customManyParticleNeighborStartIndex"); neighborStartIndex.initialize<int>(cl, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom = OpenCLArray::create<int>(cl, numParticles, "customManyParticleNumNeighborsForAtom"); numNeighborsForAtom.initialize<int>(cl, numParticles, "customManyParticleNumNeighborsForAtom");
// Select a size for the array that holds the neighbor list. We have to make a fairly // Select a size for the array that holds the neighbor list. We have to make a fairly
// arbitrary guess, but if this turns out to be too small we'll increase it later. // arbitrary guess, but if this turns out to be too small we'll increase it later.
maxNeighborPairs = 150*numParticles; maxNeighborPairs = 150*numParticles;
neighborPairs = OpenCLArray::create<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs"); neighborPairs.initialize<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors = OpenCLArray::create<int>(cl, maxNeighborPairs, "customManyParticleNeighbors"); neighbors.initialize<int>(cl, maxNeighborPairs, "customManyParticleNeighbors");
} }
// Now to generate the kernel. First, it needs to calculate all distances, angles, // Now to generate the kernel. First, it needs to calculate all distances, angles,
...@@ -6317,24 +6153,24 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -6317,24 +6153,24 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, forceKernel, index); setPeriodicBoxArgs(cl, forceKernel, index);
index += 5; index += 5;
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
forceKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, neighbors.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex.getDeviceBuffer());
} }
if (particleTypes != NULL) { if (particleTypes.isInitialized()) {
forceKernel.setArg<cl::Buffer>(index++, particleTypes->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, particleTypes.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, orderIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, orderIndex.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, particleOrder->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, particleOrder.getDeviceBuffer());
} }
if (exclusions != NULL) { if (exclusions.isInitialized()) {
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exclusions.getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex.getDeviceBuffer());
} }
if (globals != NULL) if (globals.isInitialized())
forceKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
for (auto& buffer : params->getBuffers()) for (auto& buffer : params->getBuffers())
forceKernel.setArg<cl::Memory>(index++, buffer.getMemory()); forceKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions) for (auto& function : tabulatedFunctions)
forceKernel.setArg<cl::Buffer>(index++, function->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, function.getDeviceBuffer());
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
// Set arguments for the block bounds kernel. // Set arguments for the block bounds kernel.
...@@ -6343,9 +6179,9 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -6343,9 +6179,9 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, blockBoundsKernel, index); setPeriodicBoxArgs(cl, blockBoundsKernel, index);
index += 5; index += 5;
blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter.getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox.getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, numNeighborPairs->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(index++, numNeighborPairs.getDeviceBuffer());
// Set arguments for the neighbor list kernel. // Set arguments for the neighbor list kernel.
...@@ -6353,36 +6189,36 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -6353,36 +6189,36 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, neighborsKernel, index); setPeriodicBoxArgs(cl, neighborsKernel, index);
index += 5; index += 5;
neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, blockCenter.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, neighborPairs->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, neighborPairs.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborPairs->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, numNeighborPairs.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom.getDeviceBuffer());
index++; index++;
if (exclusions != NULL) { if (exclusions.isInitialized()) {
neighborsKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, exclusions.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, exclusionStartIndex.getDeviceBuffer());
} }
// Set arguments for the kernel to find neighbor list start indices. // Set arguments for the kernel to find neighbor list start indices.
index = 0; index = 0;
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom->getDeviceBuffer()); startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom.getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, neighborStartIndex->getDeviceBuffer()); startIndicesKernel.setArg<cl::Buffer>(index++, neighborStartIndex.getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborPairs->getDeviceBuffer()); startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborPairs.getDeviceBuffer());
// Set arguments for the kernel to assemble the final neighbor list. // Set arguments for the kernel to assemble the final neighbor list.
index = 0; index = 0;
copyPairsKernel.setArg<cl::Buffer>(index++, neighborPairs->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(index++, neighborPairs.getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(index++, neighbors.getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborPairs->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborPairs.getDeviceBuffer());
index++; index++;
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom.getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighborStartIndex->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(index++, neighborStartIndex.getDeviceBuffer());
} }
} }
if (globals != NULL) { if (globals.isInitialized()) {
bool changed = false; bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) { for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]); cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...@@ -6391,7 +6227,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -6391,7 +6227,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value; globalParamValues[i] = value;
} }
if (changed) if (changed)
globals->upload(globalParamValues); globals.upload(globalParamValues);
} }
while (true) { while (true) {
int* numPairs = (int*) cl.getPinnedBuffer(); int* numPairs = (int*) cl.getPinnedBuffer();
...@@ -6406,7 +6242,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -6406,7 +6242,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
// We need to make sure there was enough memory for the neighbor list. Download the // We need to make sure there was enough memory for the neighbor list. Download the
// information asynchronously so kernels can be running at the same time. // information asynchronously so kernels can be running at the same time.
numNeighborPairs->download(numPairs, false); numNeighborPairs.download(numPairs, false);
cl.getQueue().enqueueMarker(&event); cl.getQueue().enqueueMarker(&event);
cl.executeKernel(startIndicesKernel, 256, 256); cl.executeKernel(startIndicesKernel, 256, 256);
cl.executeKernel(copyPairsKernel, maxNeighborPairs); cl.executeKernel(copyPairsKernel, maxNeighborPairs);
...@@ -6420,17 +6256,13 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -6420,17 +6256,13 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
if (*numPairs > maxNeighborPairs) { if (*numPairs > maxNeighborPairs) {
// Resize the arrays and run the calculation again. // Resize the arrays and run the calculation again.
delete neighborPairs;
neighborPairs = NULL;
delete neighbors;
neighbors = NULL;
maxNeighborPairs = (int) (1.1*(*numPairs)); maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs = OpenCLArray::create<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs"); neighborPairs.resize(maxNeighborPairs);
neighbors = OpenCLArray::create<int>(cl, maxNeighborPairs, "customManyParticleNeighbors"); neighbors.resize(maxNeighborPairs);
forceKernel.setArg<cl::Buffer>(8, neighbors->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(8, neighbors.getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, neighborPairs->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(8, neighborPairs.getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs.getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(1, neighbors->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(1, neighbors.getDeviceBuffer());
continue; continue;
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment