"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "f55abcaa5f70fbafe25f290a008c7482e3dfce1a"
Commit ecbbf442 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement new CUDA platform: constraints, LangevinIntegrator,...

Continuing to implement new CUDA platform: constraints, LangevinIntegrator, BrownianIntegrator, VariableLangevinIntegrator, VariableVerletIntegrator
parent 03cc3523
...@@ -32,8 +32,8 @@ ...@@ -32,8 +32,8 @@
using namespace OpenMM; using namespace OpenMM;
CudaArray::CudaArray(int size, int elementSize, const std::string& name) : CudaArray::CudaArray(CudaContext& context, int size, int elementSize, const std::string& name) :
size(size), elementSize(elementSize), name(name), ownsMemory(true) { context(context), size(size), elementSize(elementSize), name(name), ownsMemory(true) {
CUresult result = cuMemAlloc(&pointer, size*elementSize); CUresult result = cuMemAlloc(&pointer, size*elementSize);
if (result != CUDA_SUCCESS) { if (result != CUDA_SUCCESS) {
std::stringstream str; std::stringstream str;
...@@ -43,7 +43,7 @@ CudaArray::CudaArray(int size, int elementSize, const std::string& name) : ...@@ -43,7 +43,7 @@ CudaArray::CudaArray(int size, int elementSize, const std::string& name) :
} }
CudaArray::~CudaArray() { CudaArray::~CudaArray() {
if (ownsMemory) { if (ownsMemory && context.getContextIsValid()) {
CUresult result = cuMemFree(pointer); CUresult result = cuMemFree(pointer);
if (result != CUDA_SUCCESS) { if (result != CUDA_SUCCESS) {
std::stringstream str; std::stringstream str;
......
...@@ -35,6 +35,8 @@ ...@@ -35,6 +35,8 @@
namespace OpenMM { namespace OpenMM {
class CudaContext;
/** /**
* This class encapsulates a block of CUDA device memory. It provides a simplified API * This class encapsulates a block of CUDA device memory. It provides a simplified API
* for working with it and for copying data to and from device memory. * for working with it and for copying data to and from device memory.
...@@ -46,21 +48,23 @@ public: ...@@ -46,21 +48,23 @@ public:
* Create a CudaArray object. The object is allocated on the heap with the "new" operator. * Create a CudaArray object. The object is allocated on the heap with the "new" operator.
* The template argument is the data type of each array element. * The template argument is the data type of each array element.
* *
* @param context the context for which to create the array
* @param size the number of elements in the array * @param size the number of elements in the array
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
static CudaArray* create(int size, const std::string& name) { static CudaArray* create(CudaContext& context, int size, const std::string& name) {
return new CudaArray(size, sizeof(T), name); return new CudaArray(context, size, sizeof(T), name);
} }
/** /**
* Create a CudaArray object. * Create a CudaArray object.
* *
* @param context the context for which to create the array
* @param size the number of elements in the array * @param size the number of elements in the array
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
CudaArray(int size, int elementSize, const std::string& name); CudaArray(CudaContext& context, int size, int elementSize, const std::string& name);
~CudaArray(); ~CudaArray();
/** /**
* Get the number of elements in the array. * Get the number of elements in the array.
...@@ -123,6 +127,7 @@ public: ...@@ -123,6 +127,7 @@ public:
*/ */
void download(void* data, bool blocking = true) const; void download(void* data, bool blocking = true) const;
private: private:
CudaContext& context;
CUdeviceptr pointer; CUdeviceptr pointer;
int size, elementSize; int size, elementSize;
bool ownsMemory; bool ownsMemory;
......
...@@ -86,7 +86,7 @@ void CudaBondedUtilities::initialize(const System& system) { ...@@ -86,7 +86,7 @@ void CudaBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < width; atom++) for (int atom = 0; atom < width; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][startAtom+atom]; indexVec[bond*width+atom] = forceAtoms[i][bond][startAtom+atom];
} }
CudaArray* indices = new CudaArray(numBonds, 4*width, "bondedIndices"); CudaArray* indices = new CudaArray(context, numBonds, 4*width, "bondedIndices");
indices->upload(&indexVec[0]); indices->upload(&indexVec[0]);
atomIndices[i].push_back(indices); atomIndices[i].push_back(indices);
startAtom += width; startAtom += width;
......
...@@ -141,23 +141,23 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -141,23 +141,23 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
nonbonded = new CudaNonbondedUtilities(*this); nonbonded = new CudaNonbondedUtilities(*this);
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()); int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
posq = CudaArray::create<double4>(paddedNumAtoms, "posq"); posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
velm = CudaArray::create<double4>(paddedNumAtoms, "velm"); velm = CudaArray::create<double4>(*this, paddedNumAtoms, "velm");
compilationDefines["USE_DOUBLE_PRECISION"] = "1"; compilationDefines["USE_DOUBLE_PRECISION"] = "1";
compilationDefines["make_real2"] = "make_double2"; compilationDefines["make_real2"] = "make_double2";
compilationDefines["make_real3"] = "make_double3"; compilationDefines["make_real3"] = "make_double3";
compilationDefines["make_real4"] = "make_double4"; compilationDefines["make_real4"] = "make_double4";
energyBuffer = CudaArray::create<double>(numEnergyBuffers, "energyBuffer"); energyBuffer = CudaArray::create<double>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0)); CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
} }
else { else {
posq = CudaArray::create<float4>(paddedNumAtoms, "posq"); posq = CudaArray::create<float4>(*this, paddedNumAtoms, "posq");
velm = CudaArray::create<float4>(paddedNumAtoms, "velm"); velm = CudaArray::create<float4>(*this, paddedNumAtoms, "velm");
compilationDefines["make_real2"] = "make_float2"; compilationDefines["make_real2"] = "make_float2";
compilationDefines["make_real3"] = "make_float3"; compilationDefines["make_real3"] = "make_float3";
compilationDefines["make_real4"] = "make_float4"; compilationDefines["make_real4"] = "make_float4";
energyBuffer = CudaArray::create<float>(numEnergyBuffers, "energyBuffer"); energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0)); CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0));
} }
...@@ -198,7 +198,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -198,7 +198,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
} }
CudaContext::~CudaContext() { CudaContext::~CudaContext() {
cuCtxSetCurrent(context); setAsCurrent();
for (int i = 0; i < (int) forces.size(); i++) for (int i = 0; i < (int) forces.size(); i++)
delete forces[i]; delete forces[i];
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
...@@ -226,6 +226,7 @@ CudaContext::~CudaContext() { ...@@ -226,6 +226,7 @@ CudaContext::~CudaContext() {
string errorMessage = "Error deleting Context"; string errorMessage = "Error deleting Context";
if (contextIsValid) if (contextIsValid)
CHECK_RESULT(cuCtxDestroy(context)); CHECK_RESULT(cuCtxDestroy(context));
contextIsValid = false;
} }
void CudaContext::initialize() { void CudaContext::initialize() {
...@@ -240,10 +241,10 @@ void CudaContext::initialize() { ...@@ -240,10 +241,10 @@ void CudaContext::initialize() {
} }
velm->upload(pinnedBuffer); velm->upload(pinnedBuffer);
bonded->initialize(system); bonded->initialize(system);
force = CudaArray::create<long long>(paddedNumAtoms*3, "force"); force = CudaArray::create<long long>(*this, paddedNumAtoms*3, "force");
addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize()); addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize());
addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize()); addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize());
atomIndexDevice = CudaArray::create<int>(paddedNumAtoms, "atomIndex"); atomIndexDevice = CudaArray::create<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms); atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i) for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i; atomIndex[i] = i;
...@@ -257,6 +258,11 @@ void CudaContext::addForce(CudaForceInfo* force) { ...@@ -257,6 +258,11 @@ void CudaContext::addForce(CudaForceInfo* force) {
forces.push_back(force); forces.push_back(force);
} }
void CudaContext::setAsCurrent() {
if (contextIsValid)
cuCtxSetCurrent(context);
}
string CudaContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const { string CudaContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const {
string result = input; string result = input;
for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) { for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) {
......
...@@ -88,6 +88,17 @@ public: ...@@ -88,6 +88,17 @@ public:
CUcontext getContext() { CUcontext getContext() {
return context; return context;
} }
/**
* Get whether the CUcontext associated with this object is currently a valid contex.
*/
bool getContextIsValid() const {
return contextIsValid;
}
/**
* Set the CUcontext associated with this object to be the current context. If the context is not
* valid, this returns without doing anything.
*/
void setAsCurrent();
/** /**
* Get the CUdevice associated with this object. * Get the CUdevice associated with this object.
*/ */
......
...@@ -101,22 +101,22 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -101,22 +101,22 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL), ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL),
ccmaConvergedMemory(NULL), vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL), ccmaConvergedMemory(NULL), vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false) { vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL) {
// Create workspace arrays. // Create workspace arrays.
if (context.getUseDoublePrecision()) { if (context.getUseDoublePrecision()) {
posDelta = CudaArray::create<double4>(context.getPaddedNumAtoms(), "posDelta"); posDelta = CudaArray::create<double4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<double4> deltas(posDelta->getSize(), make_double4(0.0, 0.0, 0.0, 0.0)); vector<double4> deltas(posDelta->getSize(), make_double4(0.0, 0.0, 0.0, 0.0));
posDelta->upload(deltas); posDelta->upload(deltas);
stepSize = CudaArray::create<double2>(1, "stepSize"); stepSize = CudaArray::create<double2>(context, 1, "stepSize");
vector<double2> step(1, make_double2(0.0f, 0.0f)); vector<double2> step(1, make_double2(0.0f, 0.0f));
stepSize->upload(step); stepSize->upload(step);
} }
else { else {
posDelta = CudaArray::create<float4>(context.getPaddedNumAtoms(), "posDelta"); posDelta = CudaArray::create<float4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<float4> deltas(posDelta->getSize(), make_float4(0.0, 0.0, 0.0, 0.0)); vector<float4> deltas(posDelta->getSize(), make_float4(0.0, 0.0, 0.0, 0.0));
posDelta->upload(deltas); posDelta->upload(deltas);
stepSize = CudaArray::create<float2>(1, "stepSize"); stepSize = CudaArray::create<float2>(context, 1, "stepSize");
vector<float2> step(1, make_float2(0.0f, 0.0f)); vector<float2> step(1, make_float2(0.0f, 0.0f));
stepSize->upload(step); stepSize->upload(step);
} }
...@@ -125,13 +125,13 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -125,13 +125,13 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
map<string, string> velocityDefines; map<string, string> velocityDefines;
velocityDefines["CONSTRAIN_VELOCITIES"] = "1"; velocityDefines["CONSTRAIN_VELOCITIES"] = "1";
// CUmodule settleModule = context.createModule(CudaKernelSources::settle); CUmodule settleModule = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::settle);
// settlePosKernel = context.getKernel(settleModule, "applySettle"); settlePosKernel = context.getKernel(settleModule, "applySettle");
// settleVelKernel = context.getKernel(settleModule, "constrainVelocities"); settleVelKernel = context.getKernel(settleModule, "constrainVelocities");
// CUmodule shakeModule = context.createModule(CudaKernelSources::shakeHydrogens); CUmodule shakeModule = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::shakeHydrogens);
// shakePosKernel = context.getKernel(shakeModule, "applyShakeToHydrogens"); shakePosKernel = context.getKernel(shakeModule, "applyShakeToHydrogens");
// shakeModule = context.createModule(CudaKernelSources::shakeHydrogens, velocityDefines); shakeModule = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::shakeHydrogens, velocityDefines);
// shakeVelKernel = context.getKernel(shakeModule, "applyShakeToHydrogens"); shakeVelKernel = context.getKernel(shakeModule, "applyShakeToHydrogens");
// Record the set of constraints and how many constraints each atom is involved in. // Record the set of constraints and how many constraints each atom is involved in.
...@@ -210,8 +210,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -210,8 +210,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
isShakeAtom[atom2] = true; isShakeAtom[atom2] = true;
isShakeAtom[atom3] = true; isShakeAtom[atom3] = true;
} }
settleAtoms = CudaArray::create<int4>(atoms.size(), "settleAtoms"); settleAtoms = CudaArray::create<int4>(context, atoms.size(), "settleAtoms");
settleParams = CudaArray::create<float2>(params.size(), "settleParams"); settleParams = CudaArray::create<float2>(context, params.size(), "settleParams");
settleAtoms->upload(atoms); settleAtoms->upload(atoms);
settleParams->upload(params); settleParams->upload(params);
} }
...@@ -292,8 +292,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -292,8 +292,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
isShakeAtom[cluster.peripheralID[2]] = true; isShakeAtom[cluster.peripheralID[2]] = true;
++index; ++index;
} }
shakeAtoms = CudaArray::create<int4>(atoms.size(), "shakeAtoms"); shakeAtoms = CudaArray::create<int4>(context, atoms.size(), "shakeAtoms");
shakeParams = CudaArray::create<float4>(params.size(), "shakeParams"); shakeParams = CudaArray::create<float4>(context, params.size(), "shakeParams");
shakeAtoms->upload(atoms); shakeAtoms->upload(atoms);
shakeParams->upload(params); shakeParams->upload(params);
} }
...@@ -475,36 +475,67 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -475,36 +475,67 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
// Record the CCMA data structures. // Record the CCMA data structures.
ccmaAtoms = CudaArray::create<int2>(numCCMA, "CcmaAtoms"); ccmaAtoms = CudaArray::create<int2>(context, numCCMA, "CcmaAtoms");
ccmaDistance = CudaArray::create<float4>(numCCMA, "CcmaDistance"); ccmaAtomConstraints = CudaArray::create<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaAtomConstraints = CudaArray::create<int>(numAtoms*maxAtomConstraints, "CcmaAtomConstraints"); ccmaNumAtomConstraints = CudaArray::create<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaNumAtomConstraints = CudaArray::create<int>(numAtoms, "CcmaAtomConstraintsIndex"); ccmaConverged = CudaArray::create<int>(context, 2, "CcmaConverged");
ccmaDelta1 = CudaArray::create<float>(numCCMA, "CcmaDelta1");
ccmaDelta2 = CudaArray::create<float>(numCCMA, "CcmaDelta2");
ccmaConverged = CudaArray::create<int>(2, "CcmaConverged");
CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, 2*sizeof(int), 0), "Error allocating pinned memory"); CHECK_RESULT2(cuMemHostAlloc((void**) &ccmaConvergedMemory, 2*sizeof(int), 0), "Error allocating pinned memory");
ccmaReducedMass = CudaArray::create<float>(numCCMA, "CcmaReducedMass"); ccmaConstraintMatrixColumn = CudaArray::create<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixColumn = CudaArray::create<int>(numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixValue = CudaArray::create<float>(numCCMA*maxRowElements, "ConstraintMatrixValue");
vector<int2> atomsVec(ccmaAtoms->getSize()); vector<int2> atomsVec(ccmaAtoms->getSize());
vector<float4> distanceVec(ccmaDistance->getSize());
vector<int> atomConstraintsVec(ccmaAtomConstraints->getSize()); vector<int> atomConstraintsVec(ccmaAtomConstraints->getSize());
vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize()); vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints->getSize());
vector<float> reducedMassVec(ccmaReducedMass->getSize());
vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn->getSize()); vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn->getSize());
vector<float> constraintMatrixValueVec(ccmaConstraintMatrixValue->getSize()); if (context.getUseDoublePrecision()) {
for (int i = 0; i < numCCMA; i++) { ccmaDistance = CudaArray::create<double4>(context, numCCMA, "CcmaDistance");
int index = constraintOrder[i]; ccmaDelta1 = CudaArray::create<double>(context, numCCMA, "CcmaDelta1");
int c = ccmaConstraints[index]; ccmaDelta2 = CudaArray::create<double>(context, numCCMA, "CcmaDelta2");
atomsVec[i].x = atom1[c]; ccmaReducedMass = CudaArray::create<double>(context, numCCMA, "CcmaReducedMass");
atomsVec[i].y = atom2[c]; ccmaConstraintMatrixValue = CudaArray::create<double>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
distanceVec[i].w = (float) distance[c]; vector<double4> distanceVec(ccmaDistance->getSize());
reducedMassVec[i] = (float) (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c]))); vector<double> reducedMassVec(ccmaReducedMass->getSize());
for (unsigned int j = 0; j < matrix[index].size(); j++) { vector<double> constraintMatrixValueVec(ccmaConstraintMatrixValue->getSize());
constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first; for (int i = 0; i < numCCMA; i++) {
constraintMatrixValueVec[i+j*numCCMA] = (float) matrix[index][j].second; int index = constraintOrder[i];
int c = ccmaConstraints[index];
atomsVec[i].x = atom1[c];
atomsVec[i].y = atom2[c];
distanceVec[i].w = distance[c];
reducedMassVec[i] = (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
for (unsigned int j = 0; j < matrix[index].size(); j++) {
constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
constraintMatrixValueVec[i+j*numCCMA] = matrix[index][j].second;
}
constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
}
ccmaDistance->upload(distanceVec);
ccmaReducedMass->upload(reducedMassVec);
ccmaConstraintMatrixValue->upload(constraintMatrixValueVec);
}
else {
ccmaDistance = CudaArray::create<float4>(context, numCCMA, "CcmaDistance");
ccmaDelta1 = CudaArray::create<float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = CudaArray::create<float>(context, numCCMA, "CcmaDelta2");
ccmaReducedMass = CudaArray::create<float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixValue = CudaArray::create<float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
vector<float4> distanceVec(ccmaDistance->getSize());
vector<float> reducedMassVec(ccmaReducedMass->getSize());
vector<float> constraintMatrixValueVec(ccmaConstraintMatrixValue->getSize());
for (int i = 0; i < numCCMA; i++) {
int index = constraintOrder[i];
int c = ccmaConstraints[index];
atomsVec[i].x = atom1[c];
atomsVec[i].y = atom2[c];
distanceVec[i].w = (float) distance[c];
reducedMassVec[i] = (float) (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
for (unsigned int j = 0; j < matrix[index].size(); j++) {
constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
constraintMatrixValueVec[i+j*numCCMA] = (float) matrix[index][j].second;
}
constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
} }
constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA; ccmaDistance->upload(distanceVec);
ccmaReducedMass->upload(reducedMassVec);
ccmaConstraintMatrixValue->upload(constraintMatrixValueVec);
} }
for (unsigned int i = 0; i < atomConstraints.size(); i++) { for (unsigned int i = 0; i < atomConstraints.size(); i++) {
numAtomConstraintsVec[i] = atomConstraints[i].size(); numAtomConstraintsVec[i] = atomConstraints[i].size();
...@@ -514,27 +545,25 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -514,27 +545,25 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
} }
} }
ccmaAtoms->upload(atomsVec); ccmaAtoms->upload(atomsVec);
ccmaDistance->upload(distanceVec);
ccmaAtomConstraints->upload(atomConstraintsVec); ccmaAtomConstraints->upload(atomConstraintsVec);
ccmaNumAtomConstraints->upload(numAtomConstraintsVec); ccmaNumAtomConstraints->upload(numAtomConstraintsVec);
ccmaReducedMass->upload(reducedMassVec);
ccmaConstraintMatrixColumn->upload(constraintMatrixColumnVec); ccmaConstraintMatrixColumn->upload(constraintMatrixColumnVec);
ccmaConstraintMatrixValue->upload(constraintMatrixValueVec);
// Create the CCMA kernels. // Create the CCMA kernels.
map<string, string> defines; map<string, string> defines;
defines["NUM_CONSTRAINTS"] = context.intToString(numCCMA); defines["NUM_CONSTRAINTS"] = context.intToString(numCCMA);
defines["NUM_ATOMS"] = context.intToString(numAtoms); defines["NUM_ATOMS"] = context.intToString(numAtoms);
// CUmodule ccmaModule = context.createModule(CudaKernelSources::ccma, defines); CUmodule ccmaModule = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ccma, defines);
// ccmaDirectionsKernel = context.getKernel(ccmaModule, "computeConstraintDirections"); ccmaDirectionsKernel = context.getKernel(ccmaModule, "computeConstraintDirections");
// ccmaPosForceKernel = context.getKernel(ccmaModule, "computeConstraintForce"); ccmaPosForceKernel = context.getKernel(ccmaModule, "computeConstraintForce");
// ccmaMultiplyKernel = context.getKernel(ccmaModule, "multiplyByConstraintMatrix"); ccmaMultiplyKernel = context.getKernel(ccmaModule, "multiplyByConstraintMatrix");
// ccmaPosUpdateKernel = context.getKernel(ccmaModule, "updateAtomPositions"); ccmaPosUpdateKernel = context.getKernel(ccmaModule, "updateAtomPositions");
// defines["CONSTRAIN_VELOCITIES"] = "1"; defines["CONSTRAIN_VELOCITIES"] = "1";
// ccmaModule = context.createModule(CudaKernelSources::ccma, defines); ccmaModule = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ccma, defines);
// ccmaVelForceKernel = context.getKernel(ccmaModule, "computeConstraintForce"); ccmaVelForceKernel = context.getKernel(ccmaModule, "computeConstraintForce");
// ccmaVelUpdateKernel = context.getKernel(ccmaModule, "updateAtomPositions"); ccmaVelUpdateKernel = context.getKernel(ccmaModule, "updateAtomPositions");
CHECK_RESULT2(cuEventCreate(&ccmaEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for CCMA");
} }
// Build the list of virtual sites. // Build the list of virtual sites.
...@@ -573,12 +602,12 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -573,12 +602,12 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
int num2Avg = vsite2AvgAtomVec.size(); int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size(); int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size(); int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
vsite2AvgAtoms = CudaArray::create<int4>(max(1, num2Avg), "vsite2AvgAtoms"); vsite2AvgAtoms = CudaArray::create<int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite2AvgWeights = CudaArray::create<float2>(max(1, num2Avg), "vsite2AvgWeights"); vsite2AvgWeights = CudaArray::create<float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgAtoms = CudaArray::create<int4>(max(1, num3Avg), "vsite3AvgAtoms"); vsite3AvgAtoms = CudaArray::create<int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsite3AvgWeights = CudaArray::create<float4>(max(1, num3Avg), "vsite3AvgWeights"); vsite3AvgWeights = CudaArray::create<float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneAtoms = CudaArray::create<int4>(max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms"); vsiteOutOfPlaneAtoms = CudaArray::create<int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteOutOfPlaneWeights = CudaArray::create<float4>(max(1, numOutOfPlane), "vsiteOutOfPlaneWeights"); vsiteOutOfPlaneWeights = CudaArray::create<float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
if (num2Avg > 0) { if (num2Avg > 0) {
vsite2AvgAtoms->upload(vsite2AvgAtomVec); vsite2AvgAtoms->upload(vsite2AvgAtomVec);
vsite2AvgWeights->upload(vsite2AvgWeightVec); vsite2AvgWeights->upload(vsite2AvgWeightVec);
...@@ -620,6 +649,7 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -620,6 +649,7 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
} }
CudaIntegrationUtilities::~CudaIntegrationUtilities() { CudaIntegrationUtilities::~CudaIntegrationUtilities() {
context.setAsCurrent();
if (posDelta != NULL) if (posDelta != NULL)
delete posDelta; delete posDelta;
if (settleAtoms != NULL) if (settleAtoms != NULL)
...@@ -681,97 +711,69 @@ void CudaIntegrationUtilities::applyVelocityConstraints(double tol) { ...@@ -681,97 +711,69 @@ void CudaIntegrationUtilities::applyVelocityConstraints(double tol) {
} }
void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) { void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) {
// bool hasInitialized; CUfunction settleKernel, shakeKernel, ccmaForceKernel, ccmaUpdateKernel;
// CUfunction settleKernel, shakeKernel, ccmaForceKernel, ccmaUpdateKernel; if (constrainVelocities) {
// if (constrainVelocities) { settleKernel = settleVelKernel;
// hasInitialized = hasInitializedVelConstraintKernels; shakeKernel = shakeVelKernel;
// settleKernel = settleVelKernel; ccmaForceKernel = ccmaVelForceKernel;
// shakeKernel = shakeVelKernel; ccmaUpdateKernel = ccmaVelUpdateKernel;
// ccmaForceKernel = ccmaVelForceKernel; }
// ccmaUpdateKernel = ccmaVelUpdateKernel; else {
// hasInitializedVelConstraintKernels = true; settleKernel = settlePosKernel;
// } shakeKernel = shakePosKernel;
// else { ccmaForceKernel = ccmaPosForceKernel;
// hasInitialized = hasInitializedPosConstraintKernels; ccmaUpdateKernel = ccmaPosUpdateKernel;
// settleKernel = settlePosKernel; }
// shakeKernel = shakePosKernel; float floatTol = (float) tol;
// ccmaForceKernel = ccmaPosForceKernel; if (settleAtoms != NULL) {
// ccmaUpdateKernel = ccmaPosUpdateKernel; int numClusters = settleAtoms->getSize();
// hasInitializedPosConstraintKernels = true; void* args[] = {&numClusters, &floatTol, &context.getPosq().getDevicePointer(),
// } &posDelta->getDevicePointer(), &context.getVelm().getDevicePointer(),
// if (settleAtoms != NULL) { &settleAtoms->getDevicePointer(), &settleParams->getDevicePointer()};
// if (!hasInitialized) { context.executeKernel(settleKernel, args, settleAtoms->getSize());
// settleKernel.setArg<int>(0, settleAtoms->getSize()); }
// settleKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); if (shakeAtoms != NULL) {
// settleKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer()); int numClusters = shakeAtoms->getSize();
// settleKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer()); void* args[] = {&numClusters, &floatTol, &context.getPosq().getDevicePointer(),
// settleKernel.setArg<cl::Buffer>(5, settleAtoms->getDeviceBuffer()); constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
// settleKernel.setArg<cl::Buffer>(6, settleParams->getDeviceBuffer()); &shakeAtoms->getDevicePointer(), &shakeParams->getDevicePointer()};
// } context.executeKernel(shakeKernel, args, shakeAtoms->getSize());
// settleKernel.setArg<float>(1, (float) tol); }
// context.executeKernel(settleKernel, settleAtoms->getSize()); if (ccmaAtoms != NULL) {
// } void* directionsArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(), &context.getPosq().getDevicePointer()};
// if (shakeAtoms != NULL) { context.executeKernel(ccmaDirectionsKernel, directionsArgs, ccmaAtoms->getSize());
// if (!hasInitialized) { int i;
// shakeKernel.setArg<int>(0, shakeAtoms->getSize()); void* forceArgs[] = {&ccmaAtoms->getDevicePointer(), &ccmaDistance->getDevicePointer(),
// shakeKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
// shakeKernel.setArg<cl::Buffer>(3, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer()); &ccmaReducedMass->getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaConverged->getDevicePointer(),
// shakeKernel.setArg<cl::Buffer>(4, shakeAtoms->getDeviceBuffer()); &floatTol, &i};
// shakeKernel.setArg<cl::Buffer>(5, shakeParams->getDeviceBuffer()); void* multiplyArgs[] = {&ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(),
// } &ccmaConstraintMatrixColumn->getDevicePointer(), &ccmaConstraintMatrixValue->getDevicePointer(), &ccmaConverged->getDevicePointer(), &i};
// shakeKernel.setArg<float>(1, (float) tol); void* updateArgs[] = {&ccmaNumAtomConstraints->getDevicePointer(), &ccmaAtomConstraints->getDevicePointer(), &ccmaDistance->getDevicePointer(),
// context.executeKernel(shakeKernel, shakeAtoms->getSize()); constrainVelocities ? &context.getVelm().getDevicePointer() : &posDelta->getDevicePointer(),
// } &context.getVelm().getDevicePointer(), &ccmaDelta1->getDevicePointer(), &ccmaDelta2->getDevicePointer(),
// if (ccmaAtoms != NULL) { &ccmaConverged->getDevicePointer(), &i};
// if (!hasInitialized) { const int checkInterval = 4;
// ccmaDirectionsKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer()); for (i = 0; i < 150; i++) {
// ccmaDirectionsKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer()); if (i == 0) {
// ccmaDirectionsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); ccmaConvergedMemory[0] = 1;
// ccmaForceKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer()); ccmaConvergedMemory[1] = 0;
// ccmaForceKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer()); cuMemcpyHtoD(ccmaConverged->getDevicePointer(), ccmaConvergedMemory, 2*sizeof(int));
// ccmaForceKernel.setArg<cl::Buffer>(2, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer()); }
// ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass->getDeviceBuffer()); context.executeKernel(ccmaForceKernel, forceArgs, ccmaAtoms->getSize());
// ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1->getDeviceBuffer()); if ((i+1)%checkInterval == 0) {
// ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged->getDeviceBuffer()); cuMemcpyDtoH(ccmaConvergedMemory, ccmaConverged->getDevicePointer(), 2*sizeof(int));
// ccmaMultiplyKernel.setArg<cl::Buffer>(0, ccmaDelta1->getDeviceBuffer()); CHECK_RESULT2(cuEventRecord(ccmaEvent, 0), "Error recording event for CCMA");
// ccmaMultiplyKernel.setArg<cl::Buffer>(1, ccmaDelta2->getDeviceBuffer()); }
// ccmaMultiplyKernel.setArg<cl::Buffer>(2, ccmaConstraintMatrixColumn->getDeviceBuffer()); context.executeKernel(ccmaMultiplyKernel, multiplyArgs, ccmaAtoms->getSize());
// ccmaMultiplyKernel.setArg<cl::Buffer>(3, ccmaConstraintMatrixValue->getDeviceBuffer()); context.executeKernel(ccmaUpdateKernel, updateArgs, context.getNumAtoms());
// ccmaMultiplyKernel.setArg<cl::Buffer>(4, ccmaConverged->getDeviceBuffer()); if ((i+1)%checkInterval == 0) {
// ccmaUpdateKernel.setArg<cl::Buffer>(0, ccmaNumAtomConstraints->getDeviceBuffer()); CHECK_RESULT2(cuEventSynchronize(ccmaEvent), "Error synchronizing on event for CCMA");
// ccmaUpdateKernel.setArg<cl::Buffer>(1, ccmaAtomConstraints->getDeviceBuffer()); if (ccmaConvergedMemory[i%2])
// ccmaUpdateKernel.setArg<cl::Buffer>(2, ccmaDistance->getDeviceBuffer()); break;
// ccmaUpdateKernel.setArg<cl::Buffer>(3, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer()); }
// ccmaUpdateKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer()); }
// ccmaUpdateKernel.setArg<cl::Buffer>(5, ccmaDelta1->getDeviceBuffer()); }
// ccmaUpdateKernel.setArg<cl::Buffer>(6, ccmaDelta2->getDeviceBuffer());
// ccmaUpdateKernel.setArg<cl::Buffer>(7, ccmaConverged->getDeviceBuffer());
// }
// ccmaForceKernel.setArg<float>(6, (float) tol);
// context.executeKernel(ccmaDirectionsKernel, ccmaAtoms->getSize());
// const int checkInterval = 4;
// cl::Event event;
// for (int i = 0; i < 150; i++) {
// ccmaForceKernel.setArg<int>(7, i);
// if (i == 0) {
// ccmaConvergedMemory[0] = 1;
// ccmaConvergedMemory[1] = 0;
// context.getQueue().enqueueWriteBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), ccmaConvergedMemory);
// }
// context.executeKernel(ccmaForceKernel, ccmaAtoms->getSize());
// if ((i+1)%checkInterval == 0)
// context.getQueue().enqueueReadBuffer(ccmaConverged->getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), ccmaConvergedMemory, NULL, &event);
// ccmaMultiplyKernel.setArg<int>(5, i);
// context.executeKernel(ccmaMultiplyKernel, ccmaAtoms->getSize());
// ccmaUpdateKernel.setArg<int>(8, i);
// context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
// if ((i+1)%checkInterval == 0) {
// event.wait();
// if (ccmaConvergedMemory[i%2])
// break;
// }
// }
// }
} }
void CudaIntegrationUtilities::computeVirtualSites() { void CudaIntegrationUtilities::computeVirtualSites() {
...@@ -796,8 +798,8 @@ void CudaIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumb ...@@ -796,8 +798,8 @@ void CudaIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumb
// Create the random number arrays. // Create the random number arrays.
lastSeed = randomNumberSeed; lastSeed = randomNumberSeed;
random = CudaArray::create<float4>(32*context.getPaddedNumAtoms(), "random"); random = CudaArray::create<float4>(context, 32*context.getPaddedNumAtoms(), "random");
randomSeed = CudaArray::create<int4>(context.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed"); randomSeed = CudaArray::create<int4>(context, context.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
randomPos = random->getSize(); randomPos = random->getSize();
// Use a quick and dirty RNG to pick seeds for the real random number generator. // Use a quick and dirty RNG to pick seeds for the real random number generator.
...@@ -826,7 +828,7 @@ int CudaIntegrationUtilities::prepareRandomNumbers(int numValues) { ...@@ -826,7 +828,7 @@ int CudaIntegrationUtilities::prepareRandomNumbers(int numValues) {
} }
if (numValues > random->getSize()) { if (numValues > random->getSize()) {
delete random; delete random;
random = CudaArray::create<float4>(numValues, "random"); random = CudaArray::create<float4>(context, numValues, "random");
} }
int size = random->getSize(); int size = random->getSize();
void* args[] = {&size, &random->getDevicePointer(), &randomSeed->getDevicePointer()}; void* args[] = {&size, &random->getDevicePointer(), &randomSeed->getDevicePointer()};
......
...@@ -135,6 +135,7 @@ private: ...@@ -135,6 +135,7 @@ private:
CudaArray* ccmaDelta2; CudaArray* ccmaDelta2;
CudaArray* ccmaConverged; CudaArray* ccmaConverged;
int* ccmaConvergedMemory; int* ccmaConvergedMemory;
CUevent ccmaEvent;
CudaArray* vsite2AvgAtoms; CudaArray* vsite2AvgAtoms;
CudaArray* vsite2AvgWeights; CudaArray* vsite2AvgWeights;
CudaArray* vsite3AvgAtoms; CudaArray* vsite3AvgAtoms;
...@@ -143,7 +144,6 @@ private: ...@@ -143,7 +144,6 @@ private:
CudaArray* vsiteOutOfPlaneWeights; CudaArray* vsiteOutOfPlaneWeights;
int randomPos; int randomPos;
int lastSeed, numVsites; int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels;
struct ShakeCluster; struct ShakeCluster;
struct ConstraintOrderer; struct ConstraintOrderer;
}; };
......
...@@ -108,14 +108,14 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -108,14 +108,14 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem());
if (name == IntegrateVerletStepKernel::Name()) if (name == IntegrateVerletStepKernel::Name())
return new CudaIntegrateVerletStepKernel(name, platform, cu); return new CudaIntegrateVerletStepKernel(name, platform, cu);
// if (name == IntegrateLangevinStepKernel::Name()) if (name == IntegrateLangevinStepKernel::Name())
// return new CudaIntegrateLangevinStepKernel(name, platform, cu); return new CudaIntegrateLangevinStepKernel(name, platform, cu);
// if (name == IntegrateBrownianStepKernel::Name()) if (name == IntegrateBrownianStepKernel::Name())
// return new CudaIntegrateBrownianStepKernel(name, platform, cu); return new CudaIntegrateBrownianStepKernel(name, platform, cu);
// if (name == IntegrateVariableVerletStepKernel::Name()) if (name == IntegrateVariableVerletStepKernel::Name())
// return new CudaIntegrateVariableVerletStepKernel(name, platform, cu); return new CudaIntegrateVariableVerletStepKernel(name, platform, cu);
// if (name == IntegrateVariableLangevinStepKernel::Name()) if (name == IntegrateVariableLangevinStepKernel::Name())
// return new CudaIntegrateVariableLangevinStepKernel(name, platform, cu); return new CudaIntegrateVariableLangevinStepKernel(name, platform, cu);
// if (name == IntegrateCustomStepKernel::Name()) // if (name == IntegrateCustomStepKernel::Name())
// return new CudaIntegrateCustomStepKernel(name, platform, cu); // return new CudaIntegrateCustomStepKernel(name, platform, cu);
// if (name == ApplyAndersenThermostatKernel::Name()) // if (name == ApplyAndersenThermostatKernel::Name())
......
...@@ -82,7 +82,7 @@ void CudaCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -82,7 +82,7 @@ void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
} }
void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) { void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities(); CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0); bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cu.setAtomsWereReordered(false); cu.setAtomsWereReordered(false);
...@@ -134,7 +134,7 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) { ...@@ -134,7 +134,7 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
} }
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) { void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles); positions.resize(numParticles);
...@@ -160,7 +160,7 @@ void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& ...@@ -160,7 +160,7 @@ void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>&
} }
void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) { void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
...@@ -196,7 +196,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector< ...@@ -196,7 +196,7 @@ void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<
} }
void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles); velocities.resize(numParticles);
...@@ -221,7 +221,7 @@ void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3> ...@@ -221,7 +221,7 @@ void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>
} }
void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) { void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
...@@ -255,7 +255,7 @@ void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector ...@@ -255,7 +255,7 @@ void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector
} }
void CudaUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) { void CudaUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
long long* force = (long long*) cu.getPinnedBuffer(); long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force); cu.getForce().download(force);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
...@@ -281,7 +281,7 @@ void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, cons ...@@ -281,7 +281,7 @@ void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, cons
} }
void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) { void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
// int version = 1; // int version = 1;
// stream.write((char*) &version, sizeof(int)); // stream.write((char*) &version, sizeof(int));
// double time = cu.getTime(); // double time = cu.getTime();
...@@ -299,7 +299,7 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& ...@@ -299,7 +299,7 @@ void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream&
} }
void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) { void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
// int version; // int version;
// stream.read((char*) &version, sizeof(int)); // stream.read((char*) &version, sizeof(int));
// if (version != 1) // if (version != 1)
...@@ -330,20 +330,19 @@ void CudaApplyConstraintsKernel::initialize(const System& system) { ...@@ -330,20 +330,19 @@ void CudaApplyConstraintsKernel::initialize(const System& system) {
} }
void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) { void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
// if (!hasInitializedKernel) { if (!hasInitializedKernel) {
// hasInitializedKernel = true; hasInitializedKernel = true;
// map<string, string> defines; map<string, string> defines;
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// CUmodule module = cu.createModule(CudaKernelSources::constraints, defines); CUmodule module = cu.createModule(CudaKernelSources::constraints, defines);
// applyDeltasKernel = cu.getKernel(module, "applyPositionDeltas"); applyDeltasKernel = cu.getKernel(module, "applyPositionDeltas");
// applyDeltasKernel.setArg<cu::Buffer>(0, cu.getPosq().getDevicePointer()); }
// applyDeltasKernel.setArg<cu::Buffer>(1, cu.getIntegrationUtilities().getPosDelta().getDevicePointer()); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// } cu.clearBuffer(integration.getPosDelta());
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); integration.applyConstraints(tol);
// cu.clearBuffer(integration.getPosDelta()); void* args[] = {&cu.getPosq().getDevicePointer(), &cu.getIntegrationUtilities().getPosDelta().getDevicePointer()};
// integration.applyConstraints(tol); cu.executeKernel(applyDeltasKernel, args, cu.getNumAtoms());
// cu.executeKernel(applyDeltasKernel, cu.getNumAtoms()); integration.computeVirtualSites();
// integration.computeVirtualSites();
} }
void CudaVirtualSitesKernel::initialize(const System& system) { void CudaVirtualSitesKernel::initialize(const System& system) {
...@@ -380,13 +379,13 @@ private: ...@@ -380,13 +379,13 @@ private:
}; };
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() { CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
} }
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) { void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts; int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
...@@ -394,7 +393,7 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har ...@@ -394,7 +393,7 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
if (numBonds == 0) if (numBonds == 0)
return; return;
vector<vector<int> > atoms(numBonds, vector<int>(2)); vector<vector<int> > atoms(numBonds, vector<int>(2));
params = CudaArray::create<float2>(numBonds, "bondParams"); params = CudaArray::create<float2>(cu, numBonds, "bondParams");
vector<float2> paramVector(numBonds); vector<float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) { for (int i = 0; i < numBonds; i++) {
double length, k; double length, k;
...@@ -414,7 +413,7 @@ double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool inclu ...@@ -414,7 +413,7 @@ double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool inclu
} }
void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) { void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts; int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
...@@ -467,7 +466,7 @@ private: ...@@ -467,7 +466,7 @@ private:
}; };
CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() { CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL) if (globals != NULL)
...@@ -475,7 +474,7 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() { ...@@ -475,7 +474,7 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
} }
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) { void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts; int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
...@@ -518,7 +517,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo ...@@ -518,7 +517,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(force.getNumGlobalParameters(), "customBondGlobals"); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -556,7 +555,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include ...@@ -556,7 +555,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include
} }
void CudaCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) { void CudaCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts; int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
...@@ -609,13 +608,13 @@ private: ...@@ -609,13 +608,13 @@ private:
}; };
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() { CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
} }
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) { void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts; int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
...@@ -623,7 +622,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha ...@@ -623,7 +622,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
if (numAngles == 0) if (numAngles == 0)
return; return;
vector<vector<int> > atoms(numAngles, vector<int>(3)); vector<vector<int> > atoms(numAngles, vector<int>(3));
params = CudaArray::create<float2>(numAngles, "angleParams"); params = CudaArray::create<float2>(cu, numAngles, "angleParams");
vector<float2> paramVector(numAngles); vector<float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) { for (int i = 0; i < numAngles; i++) {
double angle, k; double angle, k;
...@@ -644,7 +643,7 @@ double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool incl ...@@ -644,7 +643,7 @@ double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool incl
} }
void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) { void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts; int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
...@@ -698,7 +697,7 @@ private: ...@@ -698,7 +697,7 @@ private:
}; };
CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() { CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL) if (globals != NULL)
...@@ -706,7 +705,7 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() { ...@@ -706,7 +705,7 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
} }
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) { void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts; int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
...@@ -749,7 +748,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust ...@@ -749,7 +748,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
variables[name] = "angleParams"+params->getParameterSuffix(i); variables[name] = "angleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(force.getNumGlobalParameters(), "customAngleGlobals"); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -787,7 +786,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ ...@@ -787,7 +786,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ
} }
void CudaCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) { void CudaCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts; int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
...@@ -846,7 +845,7 @@ CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() { ...@@ -846,7 +845,7 @@ CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
} }
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) { void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
...@@ -854,7 +853,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const ...@@ -854,7 +853,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params = CudaArray::create<float4>(numTorsions, "periodicTorsionParams"); params = CudaArray::create<float4>(cu, numTorsions, "periodicTorsionParams");
vector<float4> paramVector(numTorsions); vector<float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
int periodicity; int periodicity;
...@@ -875,7 +874,7 @@ double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool in ...@@ -875,7 +874,7 @@ double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool in
} }
void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) { void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
...@@ -934,7 +933,7 @@ CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() { ...@@ -934,7 +933,7 @@ CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
} }
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) { void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
...@@ -942,8 +941,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors ...@@ -942,8 +941,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params1 = CudaArray::create<float4>(numTorsions, "rbTorsionParams1"); params1 = CudaArray::create<float4>(cu, numTorsions, "rbTorsionParams1");
params2 = CudaArray::create<float2>(numTorsions, "rbTorsionParams2"); params2 = CudaArray::create<float2>(cu, numTorsions, "rbTorsionParams2");
vector<float4> paramVector1(numTorsions); vector<float4> paramVector1(numTorsions);
vector<float2> paramVector2(numTorsions); vector<float2> paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
...@@ -968,7 +967,7 @@ double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeF ...@@ -968,7 +967,7 @@ double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeF
} }
void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) { void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
...@@ -1034,7 +1033,7 @@ CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() { ...@@ -1034,7 +1033,7 @@ CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
} }
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) { void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
...@@ -1064,9 +1063,9 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP ...@@ -1064,9 +1063,9 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
vector<int> torsionMapsVec(numTorsions); vector<int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]); force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = CudaArray::create<float4>(coeffVec.size(), "cmapTorsionCoefficients"); coefficients = CudaArray::create<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = CudaArray::create<int2>(numMaps, "cmapTorsionMapPositions"); mapPositions = CudaArray::create<int2>(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps = CudaArray::create<int>(numTorsions, "cmapTorsionMaps"); torsionMaps = CudaArray::create<int>(cu, numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec); coefficients->upload(coeffVec);
mapPositions->upload(mapPositionsVec); mapPositions->upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec); torsionMaps->upload(torsionMapsVec);
...@@ -1121,7 +1120,7 @@ CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() { ...@@ -1121,7 +1120,7 @@ CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
} }
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) { void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
...@@ -1164,7 +1163,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu ...@@ -1164,7 +1163,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
variables[name] = "torsionParams"+params->getParameterSuffix(i); variables[name] = "torsionParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(force.getNumGlobalParameters(), "customTorsionGlobals"); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -1202,7 +1201,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl ...@@ -1202,7 +1201,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl
} }
void CudaCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) { void CudaCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts; int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
...@@ -1260,7 +1259,7 @@ private: ...@@ -1260,7 +1259,7 @@ private:
}; };
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() { CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (sigmaEpsilon != NULL) if (sigmaEpsilon != NULL)
delete sigmaEpsilon; delete sigmaEpsilon;
if (exceptionParams != NULL) if (exceptionParams != NULL)
...@@ -1310,7 +1309,7 @@ static int findFFTDimension(int minimum) { ...@@ -1310,7 +1309,7 @@ static int findFFTDimension(int minimum) {
} }
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) { void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
// Identify which exceptions are 1-4 interactions. // Identify which exceptions are 1-4 interactions.
...@@ -1328,7 +1327,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1328,7 +1327,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions. // Initialize nonbonded interactions.
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
sigmaEpsilon = CudaArray::create<float2>(numParticles, "sigmaEpsilon"); sigmaEpsilon = CudaArray::create<float2>(cu, numParticles, "sigmaEpsilon");
CudaArray& posq = cu.getPosq(); CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer(); float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer(); double4* posqd = (double4*) cu.getPinnedBuffer();
...@@ -1400,7 +1399,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1400,7 +1399,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums"); ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces"); ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums = new CudaArray((2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums"); cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
} }
else if (force.getNonbondedMethod() == NonbondedForce::PME) { else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters. // Compute the PME parameters.
...@@ -1433,14 +1432,14 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1433,14 +1432,14 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Create required data structures. // Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
pmeGrid = new CudaArray(gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid"); pmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
cu.addAutoclearBuffer(pmeGrid->getDevicePointer(), pmeGrid->getSize()*sizeof(float2)); cu.addAutoclearBuffer(pmeGrid->getDevicePointer(), pmeGrid->getSize()*sizeof(float2));
pmeBsplineModuliX = new CudaArray(gridSizeX, elementSize, "pmeBsplineModuliX"); pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(gridSizeY, elementSize, "pmeBsplineModuliY"); pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(gridSizeZ, elementSize, "pmeBsplineModuliZ"); pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineTheta = new CudaArray(PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta"); pmeBsplineTheta = new CudaArray(cu, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange = CudaArray::create<int>(gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange"); pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(numParticles, "pmeAtomGridIndex"); pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms()); sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
cufftResult result = cufftPlan3d(&fft, gridSizeX, gridSizeY, gridSizeZ, CUFFT_C2C); cufftResult result = cufftPlan3d(&fft, gridSizeX, gridSizeY, gridSizeZ, CUFFT_C2C);
if (result != CUFFT_SUCCESS) if (result != CUFFT_SUCCESS)
...@@ -1537,7 +1536,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1537,7 +1536,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (numExceptions > 0) { if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions); exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2)); vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = CudaArray::create<float4>(numExceptions, "exceptionParams"); exceptionParams = CudaArray::create<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions); vector<float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
...@@ -1596,7 +1595,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1596,7 +1595,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) { void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
// Make sure the new parameters are acceptable. // Make sure the new parameters are acceptable.
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (force.getNumParticles() != cu.getNumAtoms()) if (force.getNumParticles() != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed"); throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) { if (!hasCoulomb || !hasLJ) {
...@@ -1702,7 +1701,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1702,7 +1701,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//}; //};
// //
//CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() { //CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (params != NULL) // if (params != NULL)
// delete params; // delete params;
// if (globals != NULL) // if (globals != NULL)
...@@ -1714,7 +1713,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1714,7 +1713,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//} //}
// //
//void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) { //void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// int forceIndex; // int forceIndex;
// for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) // for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
// ; // ;
...@@ -1838,7 +1837,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1838,7 +1837,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//} //}
// //
//void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) { //void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// int numParticles = force.getNumParticles(); // int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms()) // if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed"); // throw OpenMMException("updateParametersInContext: The number of particles has changed");
...@@ -1875,7 +1874,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1875,7 +1874,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//}; //};
// //
//CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() { //CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (params != NULL) // if (params != NULL)
// delete params; // delete params;
// if (bornSum != NULL) // if (bornSum != NULL)
...@@ -1893,7 +1892,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1893,7 +1892,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//} //}
// //
//void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) { //void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (cu.getPlatformData().contexts.size() > 1) // if (cu.getPlatformData().contexts.size() > 1)
// throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices"); // throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities(); // CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
...@@ -2059,7 +2058,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -2059,7 +2058,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force) { //void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force) {
// // Make sure the new parameters are acceptable. // // Make sure the new parameters are acceptable.
// //
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// int numParticles = force.getNumParticles(); // int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms()) // if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed"); // throw OpenMMException("updateParametersInContext: The number of particles has changed");
...@@ -2117,7 +2116,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -2117,7 +2116,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//}; //};
// //
//CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() { //CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (params != NULL) // if (params != NULL)
// delete params; // delete params;
// if (computedValues != NULL) // if (computedValues != NULL)
...@@ -2139,7 +2138,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -2139,7 +2138,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//} //}
// //
//void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) { //void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (cu.getPlatformData().contexts.size() > 1) // if (cu.getPlatformData().contexts.size() > 1)
// throw OpenMMException("CustomGBForce does not support using multiple CUDA devices"); // throw OpenMMException("CustomGBForce does not support using multiple CUDA devices");
// bool useExclusionsForValue = false; // bool useExclusionsForValue = false;
...@@ -2998,7 +2997,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -2998,7 +2997,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
//} //}
// //
//void CudaCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, const CustomGBForce& force) { //void CudaCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, const CustomGBForce& force) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// int numParticles = force.getNumParticles(); // int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms()) // if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed"); // throw OpenMMException("updateParametersInContext: The number of particles has changed");
...@@ -3053,7 +3052,7 @@ private: ...@@ -3053,7 +3052,7 @@ private:
}; };
CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() { CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL) if (globals != NULL)
...@@ -3061,7 +3060,7 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() { ...@@ -3061,7 +3060,7 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
} }
void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) { void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumParticles()/numContexts; int startIndex = cu.getContextIndex()*force.getNumParticles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
...@@ -3110,7 +3109,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C ...@@ -3110,7 +3109,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
variables[name] = "particleParams"+params->getParameterSuffix(i); variables[name] = "particleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(force.getNumGlobalParameters(), "customExternalGlobals"); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -3148,7 +3147,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc ...@@ -3148,7 +3147,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc
} }
void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& context, const CustomExternalForce& force) { void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& context, const CustomExternalForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumParticles()/numContexts; int startIndex = cu.getContextIndex()*force.getNumParticles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
...@@ -3250,7 +3249,7 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con ...@@ -3250,7 +3249,7 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
//}; //};
// //
//CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() { //CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (donorParams != NULL) // if (donorParams != NULL)
// delete donorParams; // delete donorParams;
// if (acceptorParams != NULL) // if (acceptorParams != NULL)
...@@ -3291,7 +3290,7 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con ...@@ -3291,7 +3290,7 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
//void CudaCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) { //void CudaCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) {
// // Record the lists of donors and acceptors, and the parameters for each one. // // Record the lists of donors and acceptors, and the parameters for each one.
// //
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// int numContexts = cu.getPlatformData().contexts.size(); // int numContexts = cu.getPlatformData().contexts.size();
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts; // int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts; // int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
...@@ -3686,7 +3685,7 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con ...@@ -3686,7 +3685,7 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
//} //}
// //
//void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& context, const CustomHbondForce& force) { //void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& context, const CustomHbondForce& force) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// int numContexts = cu.getPlatformData().contexts.size(); // int numContexts = cu.getPlatformData().contexts.size();
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts; // int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts; // int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
...@@ -3751,7 +3750,7 @@ private: ...@@ -3751,7 +3750,7 @@ private:
}; };
CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel() { CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel() {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
if (params != NULL) if (params != NULL)
delete params; delete params;
if (globals != NULL) if (globals != NULL)
...@@ -3763,7 +3762,7 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel() ...@@ -3763,7 +3762,7 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel()
} }
void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) { void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts; int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
...@@ -3799,7 +3798,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con ...@@ -3799,7 +3798,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
functions[name] = &fp; functions[name] = &fp;
tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2); tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max); vector<float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
CudaArray* array = CudaArray::create<float4>(values.size()-1, "TabulatedFunction"); CudaArray* array = CudaArray::create<float4>(cu, values.size()-1, "TabulatedFunction");
tabulatedFunctions.push_back(array); tabulatedFunctions.push_back(array);
array->upload(f); array->upload(f);
string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), "float4"); string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), "float4");
...@@ -3807,7 +3806,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con ...@@ -3807,7 +3806,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
} }
string functionParamsName; string functionParamsName;
if (force.getNumFunctions() > 0) { if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = CudaArray::create<float4>(tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters"); tabulatedFunctionParams = CudaArray::create<float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters");
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
functionParamsName = cu.getBondedUtilities().addArgument(tabulatedFunctionParams->getDevicePointer(), "float4"); functionParamsName = cu.getBondedUtilities().addArgument(tabulatedFunctionParams->getDevicePointer(), "float4");
} }
...@@ -3832,7 +3831,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con ...@@ -3832,7 +3831,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(force.getNumGlobalParameters(), "customCompoundBondGlobals"); globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float"); string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -4020,7 +4019,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool ...@@ -4020,7 +4019,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool
} }
void CudaCalcCustomCompoundBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force) { void CudaCalcCustomCompoundBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts; int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts; int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
...@@ -4049,7 +4048,7 @@ CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() { ...@@ -4049,7 +4048,7 @@ CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
} }
void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) { void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
...@@ -4101,295 +4100,303 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -4101,295 +4100,303 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
cu.setStepCount(cu.getStepCount()+1); cu.setStepCount(cu.getStepCount()+1);
} }
//CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() { CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
// cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
// if (params != NULL) if (params != NULL)
// delete params; delete params;
//} }
//
//void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) { void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
// cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
// cu.getPlatformData().initializeContexts(system); cu.getPlatformData().initializeContexts(system);
// cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
// map<string, string> defines; map<string, string> defines;
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// CUmodule module = cu.createModule(CudaKernelSources::langevin, defines, ""); CUmodule module = cu.createModule(CudaKernelSources::langevin, defines, "");
// kernel1 = cu.getKernel(module, "integrateLangevinPart1"); kernel1 = cu.getKernel(module, "integrateLangevinPart1");
// kernel2 = cu.getKernel(module, "integrateLangevinPart2"); kernel2 = cu.getKernel(module, "integrateLangevinPart2");
// params = new CudaArray<cl_float>(cu, 3, "langevinParams"); params = new CudaArray(cu, 3, cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "langevinParams");
// prevStepSize = -1.0; prevStepSize = -1.0;
//} }
//
//void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) { void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
// if (!hasInitializedKernels) { double temperature = integrator.getTemperature();
// hasInitializedKernels = true; double friction = integrator.getFriction();
// kernel1.setArg<cu::Buffer>(0, cu.getVelm().getDevicePointer()); double stepSize = integrator.getStepSize();
// kernel1.setArg<cu::Buffer>(1, cu.getForce().getDevicePointer()); if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
// kernel1.setArg<cu::Buffer>(2, integration.getPosDelta().getDevicePointer()); // Calculate the integration parameters.
// kernel1.setArg<cu::Buffer>(3, params->getDevicePointer());
// kernel1.setArg<cu::Buffer>(4, integration.getStepSize().getDevicePointer()); double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
// kernel1.setArg<cu::Buffer>(5, integration.getRandom().getDevicePointer()); double kT = BOLTZ*temperature;
// kernel2.setArg<cu::Buffer>(0, cu.getPosq().getDevicePointer()); double vscale = exp(-stepSize/tau);
// kernel2.setArg<cu::Buffer>(1, integration.getPosDelta().getDevicePointer()); double fscale = (1-vscale)*tau;
// kernel2.setArg<cu::Buffer>(2, cu.getVelm().getDevicePointer()); double noisescale = sqrt(2*kT/tau)*sqrt(0.5*(1-vscale*vscale)*tau);
// kernel2.setArg<cu::Buffer>(3, integration.getStepSize().getDevicePointer()); if (cu.getUseDoublePrecision()) {
// } vector<double> p(params->getSize());
// double temperature = integrator.getTemperature(); p[0] = vscale;
// double friction = integrator.getFriction(); p[1] = fscale;
// double stepSize = integrator.getStepSize(); p[2] = noisescale;
// if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) { params->upload(p);
// // Calculate the integration parameters. double2 ss = make_double2(0, stepSize);
// integration.getStepSize().upload(&ss);
// double tau = (friction == 0.0 ? 0.0 : 1.0/friction); }
// double kT = BOLTZ*temperature; else {
// double vscale = exp(-stepSize/tau); vector<float> p(params->getSize());
// double fscale = (1-vscale)*tau; p[0] = (float) vscale;
// double noisescale = sqrt(2*kT/tau)*sqrt(0.5*(1-vscale*vscale)*tau); p[1] = (float) fscale;
// vector<cl_float> p(params->getSize()); p[2] = (float) noisescale;
// p[0] = (cl_float) vscale; params->upload(p);
// p[1] = (cl_float) fscale; float2 ss = make_float2(0, (float) stepSize);
// p[2] = (cl_float) noisescale; integration.getStepSize().upload(&ss);
// params->upload(p); }
// integration.getStepSize()[0].y = (cl_float) stepSize; prevTemp = temperature;
// integration.getStepSize().upload(); prevFriction = friction;
// prevTemp = temperature; prevStepSize = stepSize;
// prevFriction = friction; }
// prevStepSize = stepSize;
// } // Call the first integration kernel.
//
// // Call the first integration kernel. int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
// void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
// kernel1.setArg<cl_uint>(6, integration.prepareRandomNumbers(cu.getPaddedNumAtoms())); &params->getDevicePointer(), &integration.getStepSize().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
// cu.executeKernel(kernel1, numAtoms); cu.executeKernel(kernel1, args1, numAtoms);
//
// // Apply constraints. // Apply constraints.
//
// integration.applyConstraints(integrator.getConstraintTolerance()); integration.applyConstraints(integrator.getConstraintTolerance());
//
// // Call the second integration kernel. // Call the second integration kernel.
//
// cu.executeKernel(kernel2, numAtoms); void* args2[] = {&cu.getPosq().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
// integration.computeVirtualSites(); &cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
// cu.executeKernel(kernel2, args2, numAtoms);
// // Update the time and step count. integration.computeVirtualSites();
//
// cu.setTime(cu.getTime()+stepSize); // Update the time and step count.
// cu.setStepCount(cu.getStepCount()+1);
//} cu.setTime(cu.getTime()+stepSize);
// cu.setStepCount(cu.getStepCount()+1);
//CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() { }
//}
// CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
//void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) { }
// cuCtxSetCurrent(cu.getContext());
// cu.getPlatformData().initializeContexts(system); void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
// cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.setAsCurrent();
// map<string, string> defines; cu.getPlatformData().initializeContexts(system);
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
// CUmodule module = cu.createModule(CudaKernelSources::brownian, defines, ""); map<string, string> defines;
// kernel1 = cu.getKernel(module, "integrateBrownianPart1"); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// kernel2 = cu.getKernel(module, "integrateBrownianPart2"); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// prevStepSize = -1.0; CUmodule module = cu.createModule(CudaKernelSources::brownian, defines, "");
//} kernel1 = cu.getKernel(module, "integrateBrownianPart1");
// kernel2 = cu.getKernel(module, "integrateBrownianPart2");
//void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) { prevStepSize = -1.0;
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); }
// int numAtoms = cu.getNumAtoms();
// if (!hasInitializedKernels) { void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
// hasInitializedKernels = true; CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// kernel1.setArg<cu::Buffer>(2, cu.getForce().getDevicePointer()); int numAtoms = cu.getNumAtoms();
// kernel1.setArg<cu::Buffer>(3, integration.getPosDelta().getDevicePointer()); double temperature = integrator.getTemperature();
// kernel1.setArg<cu::Buffer>(4, cu.getVelm().getDevicePointer()); double friction = integrator.getFriction();
// kernel1.setArg<cu::Buffer>(5, integration.getRandom().getDevicePointer()); double stepSize = integrator.getStepSize();
// kernel2.setArg<cu::Buffer>(1, cu.getPosq().getDevicePointer()); double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
// kernel2.setArg<cu::Buffer>(2, cu.getVelm().getDevicePointer()); double tauDt = tau*stepSize;
// kernel2.setArg<cu::Buffer>(3, integration.getPosDelta().getDevicePointer()); double noise = sqrt(2.0f*BOLTZ*temperature*stepSize*tau);
// } float stepSizeFloat = (float) stepSize;
// double temperature = integrator.getTemperature(); float tauDtFloat = (float) tauDt;
// double friction = integrator.getFriction(); float noiseFloat = (float) noise;
// double stepSize = integrator.getStepSize();
// if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) { // Call the first integration kernel.
// double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
// kernel1.setArg<cl_float>(0, (cl_float) (tau*stepSize)); int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
// kernel1.setArg<cl_float>(1, (cl_float) (sqrt(2.0f*BOLTZ*temperature*stepSize*tau))); void* args1[] = {cu.getUseDoublePrecision() ? (void*) &tauDt : (void*) &tauDtFloat,
// kernel2.setArg<cl_float>(0, (cl_float) (1.0/stepSize)); cu.getUseDoublePrecision() ? (void*) &noise : (void*) &noiseFloat,
// prevTemp = temperature; &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
// prevFriction = friction; &cu.getVelm().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
// prevStepSize = stepSize; cu.executeKernel(kernel1, args1, numAtoms);
// }
// // Apply constraints.
// // Call the first integration kernel.
// integration.applyConstraints(integrator.getConstraintTolerance());
// kernel1.setArg<cl_uint>(6, integration.prepareRandomNumbers(cu.getPaddedNumAtoms()));
// cu.executeKernel(kernel1, numAtoms); // Call the second integration kernel.
//
// // Apply constraints. void* args2[] = {cu.getUseDoublePrecision() ? (void*) &stepSize : (void*) &stepSizeFloat,
// &cu.getPosq().getDevicePointer(), &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
// integration.applyConstraints(integrator.getConstraintTolerance()); cu.executeKernel(kernel2, args2, numAtoms);
// integration.computeVirtualSites();
// // Call the second integration kernel.
// // Update the time and step count.
// cu.executeKernel(kernel2, numAtoms);
// integration.computeVirtualSites(); cu.setTime(cu.getTime()+stepSize);
// cu.setStepCount(cu.getStepCount()+1);
// // Update the time and step count. }
//
// cu.setTime(cu.getTime()+stepSize); CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
// cu.setStepCount(cu.getStepCount()+1); }
//}
// void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
//CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() { cu.setAsCurrent();
//} cu.getPlatformData().initializeContexts(system);
// map<string, string> defines;
//void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) { defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// cuCtxSetCurrent(cu.getContext()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// cu.getPlatformData().initializeContexts(system); CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
// CUmodule module = cu.createModule(CudaKernelSources::verlet, ""); kernel1 = cu.getKernel(module, "integrateVerletPart1");
// kernel1 = cu.getKernel(module, "integrateVerletPart1"); kernel2 = cu.getKernel(module, "integrateVerletPart2");
// kernel2 = cu.getKernel(module, "integrateVerletPart2"); selectSizeKernel = cu.getKernel(module, "selectVerletStepSize");
// selectSizeKernel = cu.getKernel(module, "selectVerletStepSize"); blockSize = min(256, system.getNumParticles());
// blockSize = min(min(256, system.getNumParticles()), (int) cu.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); }
//}
// double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
//double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) { CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); int numAtoms = cu.getNumAtoms();
// int numAtoms = cu.getNumAtoms();
// if (!hasInitializedKernels) { // Select the step size to use.
// hasInitializedKernels = true;
// kernel1.setArg<cl_int>(0, numAtoms); double maxStepSize = maxTime-cu.getTime();
// kernel1.setArg<cu::Buffer>(1, cu.getIntegrationUtilities().getStepSize().getDevicePointer()); float maxStepSizeFloat = (float) maxStepSize;
// kernel1.setArg<cu::Buffer>(2, cu.getPosq().getDevicePointer()); double tol = integrator.getErrorTolerance();
// kernel1.setArg<cu::Buffer>(3, cu.getVelm().getDevicePointer()); float tolFloat = (float) tol;
// kernel1.setArg<cu::Buffer>(4, cu.getForce().getDevicePointer()); void* argsSelect[] = {cu.getUseDoublePrecision() ? (void*) &maxStepSize : (void*) &maxStepSizeFloat,
// kernel1.setArg<cu::Buffer>(5, integration.getPosDelta().getDevicePointer()); cu.getUseDoublePrecision() ? (void*) &tol : (void*) &tolFloat,
// kernel2.setArg<cl_int>(0, numAtoms); &cu.getIntegrationUtilities().getStepSize().getDevicePointer(),
// kernel2.setArg<cu::Buffer>(1, cu.getIntegrationUtilities().getStepSize().getDevicePointer()); &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer()};
// kernel2.setArg<cu::Buffer>(2, cu.getPosq().getDevicePointer()); int sharedSize = blockSize*(cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
// kernel2.setArg<cu::Buffer>(3, cu.getVelm().getDevicePointer()); cu.executeKernel(selectSizeKernel, argsSelect, blockSize, blockSize, sharedSize);
// kernel2.setArg<cu::Buffer>(4, integration.getPosDelta().getDevicePointer());
// selectSizeKernel.setArg<cl_int>(0, numAtoms); // Call the first integration kernel.
// selectSizeKernel.setArg<cu::Buffer>(3, cu.getIntegrationUtilities().getStepSize().getDevicePointer());
// selectSizeKernel.setArg<cu::Buffer>(4, cu.getVelm().getDevicePointer()); void* args1[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(),
// selectSizeKernel.setArg<cu::Buffer>(5, cu.getForce().getDevicePointer()); &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
// selectSizeKernel.setArg(6, blockSize*sizeof(cl_float), NULL); cu.executeKernel(kernel1, args1, numAtoms);
// }
// // Apply constraints.
// // Select the step size to use.
// integration.applyConstraints(integrator.getConstraintTolerance());
// float maxStepSize = (float)(maxTime-cu.getTime());
// selectSizeKernel.setArg<cl_float>(1, maxStepSize); // Call the second integration kernel.
// selectSizeKernel.setArg<cl_float>(2, (cl_float) integrator.getErrorTolerance());
// cu.executeKernel(selectSizeKernel, blockSize, blockSize); void* args2[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(),
// &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
// // Call the first integration kernel. cu.executeKernel(kernel2, args2, numAtoms);
// integration.computeVirtualSites();
// cu.executeKernel(kernel1, numAtoms);
// // Update the time and step count.
// // Apply constraints.
// double dt, time;
// integration.applyConstraints(integrator.getConstraintTolerance()); if (cu.getUseDoublePrecision()) {
// double2 stepSize;
// // Call the second integration kernel. cu.getIntegrationUtilities().getStepSize().download(&stepSize);
// dt = stepSize.y;
// cu.executeKernel(kernel2, numAtoms); time = cu.getTime()+dt;
// integration.computeVirtualSites(); if (dt == maxStepSize)
// time = maxTime; // Avoid round-off error
// // Update the time and step count. }
// else {
// cu.getIntegrationUtilities().getStepSize().download(); float2 stepSize;
// double dt = cu.getIntegrationUtilities().getStepSize()[0].y; cu.getIntegrationUtilities().getStepSize().download(&stepSize);
// double time = cu.getTime()+dt; dt = stepSize.y;
// if (dt == maxStepSize) time = cu.getTime()+dt;
// time = maxTime; // Avoid round-off error if (dt == maxStepSizeFloat)
// cu.setTime(time); time = maxTime; // Avoid round-off error
// cu.setStepCount(cu.getStepCount()+1); }
// return dt; cu.setTime(time);
//} cu.setStepCount(cu.getStepCount()+1);
// return dt;
//CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() { }
// cuCtxSetCurrent(cu.getContext());
// if (params != NULL) CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() {
// delete params; cu.setAsCurrent();
//} if (params != NULL)
// delete params;
//void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) { }
// cuCtxSetCurrent(cu.getContext());
// cu.getPlatformData().initializeContexts(system); void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
// cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cu.setAsCurrent();
// map<string, string> defines; cu.getPlatformData().initializeContexts(system);
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); map<string, string> defines;
// CUmodule module = cu.createModule(CudaKernelSources::langevin, defines, ""); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// kernel1 = cu.getKernel(module, "integrateLangevinPart1"); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// kernel2 = cu.getKernel(module, "integrateLangevinPart2"); CUmodule module = cu.createModule(CudaKernelSources::langevin, defines, "");
// selectSizeKernel = cu.getKernel(module, "selectLangevinStepSize"); kernel1 = cu.getKernel(module, "integrateLangevinPart1");
// params = new CudaArray<cl_float>(cu, 3, "langevinParams"); kernel2 = cu.getKernel(module, "integrateLangevinPart2");
// blockSize = min(256, system.getNumParticles()); selectSizeKernel = cu.getKernel(module, "selectLangevinStepSize");
// blockSize = max(blockSize, params->getSize()); params = CudaArray::create<float>(cu, 3, "langevinParams");
// blockSize = min(blockSize, (int) cu.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); blockSize = min(256, system.getNumParticles());
//} blockSize = max(blockSize, params->getSize());
// }
//double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
// int numAtoms = cu.getNumAtoms(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// if (!hasInitializedKernels) { int numAtoms = cu.getNumAtoms();
// hasInitializedKernels = true;
// kernel1.setArg<cu::Buffer>(0, cu.getVelm().getDevicePointer()); // Select the step size to use.
// kernel1.setArg<cu::Buffer>(1, cu.getForce().getDevicePointer());
// kernel1.setArg<cu::Buffer>(2, integration.getPosDelta().getDevicePointer()); double maxStepSize = maxTime-cu.getTime();
// kernel1.setArg<cu::Buffer>(3, params->getDevicePointer()); float maxStepSizeFloat = (float) maxStepSize;
// kernel1.setArg<cu::Buffer>(4, integration.getStepSize().getDevicePointer()); double tol = integrator.getErrorTolerance();
// kernel1.setArg<cu::Buffer>(5, integration.getRandom().getDevicePointer()); float tolFloat = (float) tol;
// kernel2.setArg<cu::Buffer>(0, cu.getPosq().getDevicePointer()); double tau = integrator.getFriction() == 0.0 ? 0.0 : 1.0/integrator.getFriction();
// kernel2.setArg<cu::Buffer>(1, integration.getPosDelta().getDevicePointer()); float tauFloat = (float) tau;
// kernel2.setArg<cu::Buffer>(2, cu.getVelm().getDevicePointer()); double kT = BOLTZ*integrator.getTemperature();
// kernel2.setArg<cu::Buffer>(3, integration.getStepSize().getDevicePointer()); float kTFloat = (float) kT;
// selectSizeKernel.setArg<cu::Buffer>(4, integration.getStepSize().getDevicePointer()); void* argsSelect[] = {cu.getUseDoublePrecision() ? (void*) &maxStepSize : (void*) &maxStepSizeFloat,
// selectSizeKernel.setArg<cu::Buffer>(5, cu.getVelm().getDevicePointer()); cu.getUseDoublePrecision() ? (void*) &tol : (void*) &tolFloat,
// selectSizeKernel.setArg<cu::Buffer>(6, cu.getForce().getDevicePointer()); cu.getUseDoublePrecision() ? (void*) &tau : (void*) &tauFloat,
// selectSizeKernel.setArg<cu::Buffer>(7, params->getDevicePointer()); cu.getUseDoublePrecision() ? (void*) &kT : (void*) &kTFloat,
// selectSizeKernel.setArg(8, params->getSize()*sizeof(cl_float), NULL); &cu.getIntegrationUtilities().getStepSize().getDevicePointer(),
// selectSizeKernel.setArg(9, blockSize*sizeof(cl_float), NULL); &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &params->getDevicePointer()};
// } int sharedSize = blockSize*(cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
// cu.executeKernel(selectSizeKernel, argsSelect, blockSize, blockSize, sharedSize);
// // Select the step size to use.
// // Call the first integration kernel.
// float maxStepSize = (float)(maxTime-cu.getTime());
// selectSizeKernel.setArg<cl_float>(0, maxStepSize); int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
// selectSizeKernel.setArg<cl_float>(1, (cl_float) integrator.getErrorTolerance()); void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
// selectSizeKernel.setArg<cl_float>(2, (cl_float) (integrator.getFriction() == 0.0 ? 0.0 : 1.0/integrator.getFriction())); &params->getDevicePointer(), &integration.getStepSize().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
// selectSizeKernel.setArg<cl_float>(3, (cl_float) (BOLTZ*integrator.getTemperature())); cu.executeKernel(kernel1, args1, numAtoms);
// cu.executeKernel(selectSizeKernel, blockSize, blockSize);
// // Apply constraints.
// // Call the first integration kernel.
// integration.applyConstraints(integrator.getConstraintTolerance());
// kernel1.setArg<cl_uint>(6, integration.prepareRandomNumbers(cu.getPaddedNumAtoms()));
// cu.executeKernel(kernel1, numAtoms); // Call the second integration kernel.
//
// // Apply constraints. void* args2[] = {&cu.getPosq().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
// &cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
// integration.applyConstraints(integrator.getConstraintTolerance()); cu.executeKernel(kernel2, args2, numAtoms);
// integration.computeVirtualSites();
// // Call the second integration kernel.
// // Update the time and step count.
// cu.executeKernel(kernel2, numAtoms);
// integration.computeVirtualSites(); double dt, time;
// if (cu.getUseDoublePrecision()) {
// // Update the time and step count. double2 stepSize;
// cu.getIntegrationUtilities().getStepSize().download(&stepSize);
// cu.getIntegrationUtilities().getStepSize().download(); dt = stepSize.y;
// double dt = cu.getIntegrationUtilities().getStepSize()[0].y; time = cu.getTime()+dt;
// double time = cu.getTime()+dt; if (dt == maxStepSize)
// if (dt == maxStepSize) time = maxTime; // Avoid round-off error
// time = maxTime; // Avoid round-off error }
// cu.setTime(time); else {
// cu.setStepCount(cu.getStepCount()+1); float2 stepSize;
// return dt; cu.getIntegrationUtilities().getStepSize().download(&stepSize);
//} dt = stepSize.y;
// time = cu.getTime()+dt;
if (dt == maxStepSizeFloat)
time = maxTime; // Avoid round-off error
}
cu.setTime(time);
cu.setStepCount(cu.getStepCount()+1);
return dt;
}
//class CudaIntegrateCustomStepKernel::ReorderListener : public CudaContext::ReorderListener { //class CudaIntegrateCustomStepKernel::ReorderListener : public CudaContext::ReorderListener {
//public: //public:
// ReorderListener(CudaContext& cu, CudaParameterSet& perDofValues, vector<vector<cl_float> >& localPerDofValues, bool& deviceValuesAreCurrent) : // ReorderListener(CudaContext& cu, CudaParameterSet& perDofValues, vector<vector<cl_float> >& localPerDofValues, bool& deviceValuesAreCurrent) :
...@@ -4433,7 +4440,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -4433,7 +4440,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
//}; //};
// //
//CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() { //CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (globalValues != NULL) // if (globalValues != NULL)
// delete globalValues; // delete globalValues;
// if (contextParameterValues != NULL) // if (contextParameterValues != NULL)
...@@ -4451,7 +4458,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -4451,7 +4458,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
//} //}
// //
//void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) { //void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// cu.getPlatformData().initializeContexts(system); // cu.getPlatformData().initializeContexts(system);
// cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); // cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
// numGlobalVariables = integrator.getNumGlobalVariables(); // numGlobalVariables = integrator.getNumGlobalVariables();
...@@ -4956,13 +4963,13 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -4956,13 +4963,13 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
//} //}
// //
//CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() { //CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (atomGroups != NULL) // if (atomGroups != NULL)
// delete atomGroups; // delete atomGroups;
//} //}
// //
//void CudaApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) { //void CudaApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// randomSeed = thermostat.getRandomNumberSeed(); // randomSeed = thermostat.getRandomNumberSeed();
// map<string, string> defines; // map<string, string> defines;
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); // defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
...@@ -4997,7 +5004,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -4997,7 +5004,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
//} //}
// //
//CudaApplyMonteCarloBarostatKernel::~CudaApplyMonteCarloBarostatKernel() { //CudaApplyMonteCarloBarostatKernel::~CudaApplyMonteCarloBarostatKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (savedPositions != NULL) // if (savedPositions != NULL)
// delete savedPositions; // delete savedPositions;
// if (moleculeAtoms != NULL) // if (moleculeAtoms != NULL)
...@@ -5007,7 +5014,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -5007,7 +5014,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
//} //}
// //
//void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) { //void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// savedPositions = new CudaArray<mm_float4>(cu, cu.getPaddedNumAtoms(), "savedPositions"); // savedPositions = new CudaArray<mm_float4>(cu, cu.getPaddedNumAtoms(), "savedPositions");
// CUmodule module = cu.createModule(CudaKernelSources::monteCarloBarostat); // CUmodule module = cu.createModule(CudaKernelSources::monteCarloBarostat);
// kernel = cu.getKernel(module, "scalePositions"); // kernel = cu.getKernel(module, "scalePositions");
...@@ -5056,7 +5063,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -5056,7 +5063,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
//} //}
void CudaCalcKineticEnergyKernel::initialize(const System& system) { void CudaCalcKineticEnergyKernel::initialize(const System& system) {
cuCtxSetCurrent(cu.getContext()); cu.setAsCurrent();
int numParticles = system.getNumParticles(); int numParticles = system.getNumParticles();
masses.resize(numParticles); masses.resize(numParticles);
for (int i = 0; i < numParticles; ++i) for (int i = 0; i < numParticles; ++i)
...@@ -5089,13 +5096,13 @@ double CudaCalcKineticEnergyKernel::execute(ContextImpl& context) { ...@@ -5089,13 +5096,13 @@ double CudaCalcKineticEnergyKernel::execute(ContextImpl& context) {
} }
//CudaRemoveCMMotionKernel::~CudaRemoveCMMotionKernel() { //CudaRemoveCMMotionKernel::~CudaRemoveCMMotionKernel() {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// if (cmMomentum != NULL) // if (cmMomentum != NULL)
// delete cmMomentum; // delete cmMomentum;
//} //}
// //
//void CudaRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) { //void CudaRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
// cuCtxSetCurrent(cu.getContext()); // cu.setAsCurrent();
// frequency = force.getFrequency(); // frequency = force.getFrequency();
// int numAtoms = cu.getNumAtoms(); // int numAtoms = cu.getNumAtoms();
// cmMomentum = new CudaArray<mm_float4>(cu, (numAtoms+CudaContext::ThreadBlockSize-1)/CudaContext::ThreadBlockSize, "cmMomentum"); // cmMomentum = new CudaArray<mm_float4>(cu, (numAtoms+CudaContext::ThreadBlockSize-1)/CudaContext::ThreadBlockSize, "cmMomentum");
......
...@@ -942,133 +942,126 @@ private: ...@@ -942,133 +942,126 @@ private:
CUfunction kernel1, kernel2; CUfunction kernel1, kernel2;
}; };
///** /**
// * This kernel is invoked by LangevinIntegrator to take one time step. * This kernel is invoked by LangevinIntegrator to take one time step.
// */ */
//class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel { class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
//public: public:
// CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu), CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu), params(NULL) {
// hasInitializedKernels(false), params(NULL) { }
// } ~CudaIntegrateLangevinStepKernel();
// ~CudaIntegrateLangevinStepKernel(); /**
// /** * Initialize the kernel, setting up the particle masses.
// * Initialize the kernel, setting up the particle masses. *
// * * @param system the System this kernel will be applied to
// * @param system the System this kernel will be applied to * @param integrator the LangevinIntegrator this kernel will be used for
// * @param integrator the LangevinIntegrator this kernel will be used for */
// */ void initialize(const System& system, const LangevinIntegrator& integrator);
// void initialize(const System& system, const LangevinIntegrator& integrator); /**
// /** * Execute the kernel.
// * Execute the kernel. *
// * * @param context the context in which to execute this kernel
// * @param context the context in which to execute this kernel * @param integrator the LangevinIntegrator this kernel is being used for
// * @param integrator the LangevinIntegrator this kernel is being used for */
// */ void execute(ContextImpl& context, const LangevinIntegrator& integrator);
// void execute(ContextImpl& context, const LangevinIntegrator& integrator); private:
//private: CudaContext& cu;
// CudaContext& cu; double prevTemp, prevFriction, prevStepSize;
// double prevTemp, prevFriction, prevStepSize; CudaArray* params;
// bool hasInitializedKernels; CUfunction kernel1, kernel2;
// CudaArray<cl_float>* params; };
// CUfunction kernel1, kernel2;
//}; /**
// * This kernel is invoked by BrownianIntegrator to take one time step.
///** */
// * This kernel is invoked by BrownianIntegrator to take one time step. class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
// */ public:
//class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel { CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBrownianStepKernel(name, platform), cu(cu) {
//public: }
// CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBrownianStepKernel(name, platform), cu(cu), ~CudaIntegrateBrownianStepKernel();
// hasInitializedKernels(false), prevTemp(-1), prevFriction(-1), prevStepSize(-1) { /**
// } * Initialize the kernel.
// ~CudaIntegrateBrownianStepKernel(); *
// /** * @param system the System this kernel will be applied to
// * Initialize the kernel. * @param integrator the BrownianIntegrator this kernel will be used for
// * */
// * @param system the System this kernel will be applied to void initialize(const System& system, const BrownianIntegrator& integrator);
// * @param integrator the BrownianIntegrator this kernel will be used for /**
// */ * Execute the kernel.
// void initialize(const System& system, const BrownianIntegrator& integrator); *
// /** * @param context the context in which to execute this kernel
// * Execute the kernel. * @param integrator the BrownianIntegrator this kernel is being used for
// * */
// * @param context the context in which to execute this kernel void execute(ContextImpl& context, const BrownianIntegrator& integrator);
// * @param integrator the BrownianIntegrator this kernel is being used for private:
// */ CudaContext& cu;
// void execute(ContextImpl& context, const BrownianIntegrator& integrator); double prevTemp, prevFriction, prevStepSize;
//private: CUfunction kernel1, kernel2;
// CudaContext& cu; };
// double prevTemp, prevFriction, prevStepSize;
// bool hasInitializedKernels; /**
// CUfunction kernel1, kernel2; * This kernel is invoked by VariableVerletIntegrator to take one time step.
//}; */
// class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
///** public:
// * This kernel is invoked by VariableVerletIntegrator to take one time step. CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableVerletStepKernel(name, platform), cu(cu) {
// */ }
//class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel { ~CudaIntegrateVariableVerletStepKernel();
//public: /**
// CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableVerletStepKernel(name, platform), cu(cu), * Initialize the kernel.
// hasInitializedKernels(false) { *
// } * @param system the System this kernel will be applied to
// ~CudaIntegrateVariableVerletStepKernel(); * @param integrator the VerletIntegrator this kernel will be used for
// /** */
// * Initialize the kernel. void initialize(const System& system, const VariableVerletIntegrator& integrator);
// * /**
// * @param system the System this kernel will be applied to * Execute the kernel.
// * @param integrator the VerletIntegrator this kernel will be used for *
// */ * @param context the context in which to execute this kernel
// void initialize(const System& system, const VariableVerletIntegrator& integrator); * @param integrator the VerletIntegrator this kernel is being used for
// /** * @param maxTime the maximum time beyond which the simulation should not be advanced
// * Execute the kernel. * @return the size of the step that was taken
// * */
// * @param context the context in which to execute this kernel double execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime);
// * @param integrator the VerletIntegrator this kernel is being used for private:
// * @param maxTime the maximum time beyond which the simulation should not be advanced CudaContext& cu;
// * @return the size of the step that was taken int blockSize;
// */ CUfunction kernel1, kernel2, selectSizeKernel;
// double execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime); };
//private:
// CudaContext& cu; /**
// bool hasInitializedKernels; * This kernel is invoked by VariableLangevinIntegrator to take one time step.
// int blockSize; */
// CUfunction kernel1, kernel2, selectSizeKernel; class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
//}; public:
// CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform),
///** cu(cu), params(NULL) {
// * This kernel is invoked by VariableLangevinIntegrator to take one time step. }
// */ ~CudaIntegrateVariableLangevinStepKernel();
//class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel { /**
//public: * Initialize the kernel, setting up the particle masses.
// CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform), cu(cu), *
// hasInitializedKernels(false), params(NULL) { * @param system the System this kernel will be applied to
// } * @param integrator the VariableLangevinIntegrator this kernel will be used for
// ~CudaIntegrateVariableLangevinStepKernel(); */
// /** void initialize(const System& system, const VariableLangevinIntegrator& integrator);
// * Initialize the kernel, setting up the particle masses. /**
// * * Execute the kernel.
// * @param system the System this kernel will be applied to *
// * @param integrator the VariableLangevinIntegrator this kernel will be used for * @param context the context in which to execute this kernel
// */ * @param integrator the VariableLangevinIntegrator this kernel is being used for
// void initialize(const System& system, const VariableLangevinIntegrator& integrator); * @param maxTime the maximum time beyond which the simulation should not be advanced
// /** * @return the size of the step that was taken
// * Execute the kernel. */
// * double execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime);
// * @param context the context in which to execute this kernel private:
// * @param integrator the VariableLangevinIntegrator this kernel is being used for CudaContext& cu;
// * @param maxTime the maximum time beyond which the simulation should not be advanced int blockSize;
// * @return the size of the step that was taken CudaArray* params;
// */ CUfunction kernel1, kernel2, selectSizeKernel;
// double execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime); double prevTemp, prevFriction, prevErrorTol;
//private: };
// CudaContext& cu;
// bool hasInitializedKernels;
// int blockSize;
// CudaArray<cl_float>* params;
// CUfunction kernel1, kernel2, selectSizeKernel;
// double prevTemp, prevFriction, prevErrorTol;
//};
//
///** ///**
// * This kernel is invoked by CustomIntegrator to take one time step. // * This kernel is invoked by CustomIntegrator to take one time step.
// */ // */
......
...@@ -169,14 +169,14 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -169,14 +169,14 @@ void CudaNonbondedUtilities::initialize(const System& system) {
exclusionIndicesVec.push_back(iter->second); exclusionIndicesVec.push_back(iter->second);
} }
exclusionRowIndicesVec[++currentRow] = exclusionIndicesVec.size(); exclusionRowIndicesVec[++currentRow] = exclusionIndicesVec.size();
exclusionIndices = CudaArray::create<unsigned int>(exclusionIndicesVec.size(), "exclusionIndices"); exclusionIndices = CudaArray::create<unsigned int>(context, exclusionIndicesVec.size(), "exclusionIndices");
exclusionRowIndices = CudaArray::create<unsigned int>(exclusionRowIndicesVec.size(), "exclusionRowIndices"); exclusionRowIndices = CudaArray::create<unsigned int>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
exclusionIndices->upload(exclusionIndicesVec); exclusionIndices->upload(exclusionIndicesVec);
exclusionRowIndices->upload(exclusionRowIndicesVec); exclusionRowIndices->upload(exclusionRowIndicesVec);
// Record the exclusion data. // Record the exclusion data.
exclusions = CudaArray::create<unsigned int>(tilesWithExclusions.size()*CudaContext::TileSize, "exclusions"); exclusions = CudaArray::create<unsigned int>(context, tilesWithExclusions.size()*CudaContext::TileSize, "exclusions");
vector<unsigned int> exclusionVec(exclusions->getSize()); vector<unsigned int> exclusionVec(exclusions->getSize());
for (int i = 0; i < exclusions->getSize(); ++i) for (int i = 0; i < exclusions->getSize(); ++i)
exclusionVec[i] = 0xFFFFFFFF; exclusionVec[i] = 0xFFFFFFFF;
...@@ -231,11 +231,11 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -231,11 +231,11 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxTiles = numTiles; maxTiles = numTiles;
if (maxTiles < 1) if (maxTiles < 1)
maxTiles = 1; maxTiles = 1;
interactingTiles = CudaArray::create<ushort2>(maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles");
interactionFlags = CudaArray::create<unsigned int>(maxTiles, "interactionFlags"); interactionFlags = CudaArray::create<unsigned int>(context, maxTiles, "interactionFlags");
interactionCount = CudaArray::create<unsigned int>(1, "interactionCount"); interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount");
blockCenter = CudaArray::create<float4>(numAtomBlocks, "blockCenter"); blockCenter = CudaArray::create<float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = CudaArray::create<float4>(numAtomBlocks, "blockBoundingBox"); blockBoundingBox = CudaArray::create<float4>(context, numAtomBlocks, "blockBoundingBox");
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedInteractionCount, sizeof(unsigned int), 0)); CHECK_RESULT(cuMemHostAlloc((void**) &pinnedInteractionCount, sizeof(unsigned int), 0));
pinnedInteractionCount[0] = 0; pinnedInteractionCount[0] = 0;
interactionCount->upload(pinnedInteractionCount); interactionCount->upload(pinnedInteractionCount);
...@@ -330,11 +330,11 @@ void CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -330,11 +330,11 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
if (maxTiles > numTiles) if (maxTiles > numTiles)
maxTiles = numTiles; maxTiles = numTiles;
delete interactingTiles; delete interactingTiles;
interactingTiles = CudaArray::create<ushort2>(maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles");
forceArgs[8] = &interactingTiles->getDevicePointer(); forceArgs[8] = &interactingTiles->getDevicePointer();
findInteractingBlocksArgs[5] = &interactingTiles->getDevicePointer(); findInteractingBlocksArgs[5] = &interactingTiles->getDevicePointer();
delete interactionFlags; delete interactionFlags;
interactionFlags = CudaArray::create<unsigned int>(maxTiles, "interactionFlags"); interactionFlags = CudaArray::create<unsigned int>(context, maxTiles, "interactionFlags");
forceArgs[13] = &interactionFlags->getDevicePointer(); forceArgs[13] = &interactionFlags->getDevicePointer();
findInteractingBlocksArgs[6] = &interactionFlags->getDevicePointer(); findInteractingBlocksArgs[6] = &interactionFlags->getDevicePointer();
findInteractionsWithinBlocksArgs[3] = &interactingTiles->getDevicePointer(); findInteractionsWithinBlocksArgs[3] = &interactingTiles->getDevicePointer();
......
...@@ -73,11 +73,11 @@ CudaSort::CudaSort(CudaContext& context, SortTrait* trait, unsigned int length) ...@@ -73,11 +73,11 @@ CudaSort::CudaSort(CudaContext& context, SortTrait* trait, unsigned int length)
// Create workspace arrays. // Create workspace arrays.
dataRange = new CudaArray(2, trait->getKeySize(), "sortDataRange"); dataRange = new CudaArray(context, 2, trait->getKeySize(), "sortDataRange");
bucketOffset = CudaArray::create<uint1>(numBuckets, "bucketOffset"); bucketOffset = CudaArray::create<uint1>(context, numBuckets, "bucketOffset");
bucketOfElement = CudaArray::create<uint1>(length, "bucketOfElement"); bucketOfElement = CudaArray::create<uint1>(context, length, "bucketOfElement");
offsetInBucket = CudaArray::create<uint1>(length, "offsetInBucket"); offsetInBucket = CudaArray::create<uint1>(context, length, "offsetInBucket");
buckets = new CudaArray(length, trait->getDataSize(), "buckets"); buckets = new CudaArray(context, length, trait->getDataSize(), "buckets");
} }
CudaSort::~CudaSort() { CudaSort::~CudaSort() {
......
/**
* Perform the first step of Brownian integration.
*/
extern "C" __global__ void integrateBrownianPart1(real tauDeltaT, real noiseAmplitude, const long long* __restrict__ force,
real4* __restrict__ posDelta, const real4* __restrict__ velm, const float4* __restrict__ random, unsigned int randomIndex) {
randomIndex += blockIdx.x*blockDim.x+threadIdx.x;
const real fscale = tauDeltaT/(real) 0xFFFFFFFF;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real invMass = velm[index].w;
if (invMass != 0) {
posDelta[index].x = fscale*invMass*force[index] + noiseAmplitude*SQRT(invMass)*random[randomIndex].x;
posDelta[index].y = fscale*invMass*force[index+PADDED_NUM_ATOMS] + noiseAmplitude*SQRT(invMass)*random[randomIndex].y;
posDelta[index].z = fscale*invMass*force[index+PADDED_NUM_ATOMS*2] + noiseAmplitude*SQRT(invMass)*random[randomIndex].z;
}
randomIndex += blockDim.x*gridDim.x;
}
}
/**
* Perform the second step of Brownian integration.
*/
extern "C" __global__ void integrateBrownianPart2(real deltaT, real4* posq, real4* velm, const real4* __restrict__ posDelta) {
const real oneOverDeltaT = RECIP(deltaT);
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
if (velm[index].w != 0) {
real4 delta = posDelta[index];
velm[index].x = oneOverDeltaT*delta.x;
velm[index].y = oneOverDeltaT*delta.y;
velm[index].z = oneOverDeltaT*delta.z;
posq[index].x = posq[index].x + delta.x;
posq[index].y = posq[index].y + delta.y;
posq[index].z = posq[index].z + delta.z;
}
}
}
/**
* Compute the direction each constraint is pointing in. This is called once at the beginning of constraint evaluation.
*/
extern "C" __global__ void computeConstraintDirections(const int2* __restrict__ constraintAtoms, real4* __restrict__ constraintDistance, const real4* __restrict__ atomPositions) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_CONSTRAINTS; index += blockDim.x*gridDim.x) {
// Compute the direction for this constraint.
int2 atoms = constraintAtoms[index];
real4 dir = constraintDistance[index];
real4 oldPos1 = atomPositions[atoms.x];
real4 oldPos2 = atomPositions[atoms.y];
dir.x = oldPos1.x-oldPos2.x;
dir.y = oldPos1.y-oldPos2.y;
dir.z = oldPos1.z-oldPos2.z;
constraintDistance[index] = dir;
}
}
/**
* Compute the force applied by each constraint.
*/
extern "C" __global__ void computeConstraintForce(const int2* __restrict__ constraintAtoms, const real4* __restrict__ constraintDistance, const real4* __restrict__ atomPositions,
const real* __restrict__ reducedMass, real* __restrict__ delta1, int* __restrict__ converged, float tol, int iteration) {
__shared__ int groupConverged;
if (converged[1-iteration%2]) {
if (blockIdx.x == 0 && threadIdx.x == 0)
converged[iteration%2] = 1;
return; // The constraint iteration has already converged.
}
if (threadIdx.x == 0)
groupConverged = 1;
__syncthreads();
real lowerTol = 1.0f-2.0f*tol+tol*tol;
real upperTol = 1.0f+2.0f*tol+tol*tol;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_CONSTRAINTS; index += blockDim.x*gridDim.x) {
// Compute the force due to this constraint.
int2 atoms = constraintAtoms[index];
real4 dir = constraintDistance[index];
real4 rp_ij = atomPositions[atoms.x]-atomPositions[atoms.y];
#ifndef CONSTRAIN_VELOCITIES
rp_ij.x += dir.x;
rp_ij.y += dir.y;
rp_ij.z += dir.z;
#endif
real rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
real d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
#ifdef CONSTRAIN_VELOCITIES
delta1[index] = -2.0f*reducedMass[index]*rrpr/d_ij2;
// See whether it has converged.
if (groupConverged && fabs(delta1[index]) > tol) {
groupConverged = 0;
converged[iteration%2] = 0;
}
#else
real rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
real dist2 = dir.w*dir.w;
real diff = dist2 - rp2;
delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f);
// See whether it has converged.
if (groupConverged && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2)) {
groupConverged = 0;
converged[iteration%2] = 0;
}
#endif
}
}
/**
* Multiply the vector of constraint forces by the constraint matrix.
*/
extern "C" __global__ void multiplyByConstraintMatrix(const real* __restrict__ delta1, real* __restrict__ delta2, const int* __restrict__ constraintMatrixColumn,
const real* __restrict__ constraintMatrixValue, const int* __restrict__ converged, int iteration) {
if (converged[iteration%2])
return; // The constraint iteration has already converged.
// Multiply by the inverse constraint matrix.
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_CONSTRAINTS; index += blockDim.x*gridDim.x) {
real sum = 0.0f;
for (int i = 0; ; i++) {
int element = index+i*NUM_CONSTRAINTS;
int column = constraintMatrixColumn[element];
if (column >= NUM_CONSTRAINTS)
break;
sum += delta1[column]*constraintMatrixValue[element];
}
delta2[index] = sum;
}
}
/**
* Update the atom positions based on constraint forces.
*/
extern "C" __global__ void updateAtomPositions(const int* __restrict__ numAtomConstraints, const int* __restrict__ atomConstraints, const real4* __restrict__ constraintDistance,
real4* __restrict__ atomPositions, const real4* __restrict__ velm, const real* __restrict__ delta1, const real* __restrict__ delta2, int* __restrict__ converged, int iteration) {
if (blockIdx.x == 0 && threadIdx.x == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged.
real damping = (iteration < 2 ? 0.5f : 1.0f);
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Compute the new position of this atom.
real4 atomPos = atomPositions[index];
real invMass = velm[index].w;
int num = numAtomConstraints[index];
for (int i = 0; i < num; i++) {
int constraint = atomConstraints[index+i*NUM_ATOMS];
bool forward = (constraint > 0);
constraint = (forward ? constraint-1 : -constraint-1);
real constraintForce = damping*invMass*delta2[constraint];
constraintForce = (forward ? constraintForce : -constraintForce);
real4 dir = constraintDistance[constraint];
atomPos.x += constraintForce*dir.x;
atomPos.y += constraintForce*dir.y;
atomPos.z += constraintForce*dir.z;
}
atomPositions[index] = atomPos;
}
}
extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4* __restrict__ posDelta) {
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 position = posq[index];
position.x += posDelta[index].x;
position.y += posDelta[index].y;
position.z += posDelta[index].z;
posq[index] = position;
}
}
enum {VelScale, ForceScale, NoiseScale, MaxParams};
/**
* Perform the first step of Langevin integration.
*/
extern "C" __global__ void integrateLangevinPart1(real4* __restrict__ velm, const long long* __restrict__ force, real4* __restrict__ posDelta,
const real* __restrict__ paramBuffer, const real2* __restrict__ dt, const float4* __restrict__ random, unsigned int randomIndex) {
real vscale = paramBuffer[VelScale];
real fscale = paramBuffer[ForceScale]/(real) 0xFFFFFFFF;
real noisescale = paramBuffer[NoiseScale];
real stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
randomIndex += index;
while (index < NUM_ATOMS) {
real4 velocity = velm[index];
if (velocity.w != 0) {
real sqrtInvMass = SQRT(velocity.w);
velocity.x = vscale*velocity.x + fscale*velocity.w*force[index] + noisescale*sqrtInvMass*random[randomIndex].x;
velocity.y = vscale*velocity.y + fscale*velocity.w*force[index+PADDED_NUM_ATOMS] + noisescale*sqrtInvMass*random[randomIndex].y;
velocity.z = vscale*velocity.z + fscale*velocity.w*force[index+PADDED_NUM_ATOMS*2] + noisescale*sqrtInvMass*random[randomIndex].z;
velm[index] = velocity;
posDelta[index] = make_real4(stepSize*velocity.x, stepSize*velocity.y, stepSize*velocity.z, 0);
}
randomIndex += blockDim.x*gridDim.x;
index += blockDim.x*gridDim.x;
}
}
/**
* Perform the second step of Langevin integration.
*/
extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, const real4* __restrict__ posDelta, real4* __restrict__ velm, const real2* __restrict__ dt) {
double invStepSize = 1.0/dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
while (index < NUM_ATOMS) {
real4 vel = velm[index];
if (vel.w != 0) {
real4 pos = posq[index];
real4 delta = posDelta[index];
pos.x += delta.x;
pos.y += delta.y;
pos.z += delta.z;
vel.x = (real) invStepSize*delta.x;
vel.y = (real) invStepSize*delta.y;
vel.z = (real) invStepSize*delta.z;
posq[index] = pos;
velm[index] = vel;
}
index += blockDim.x*gridDim.x;
}
}
/**
* Select the step size to use for the next step.
*/
extern "C" __global__ void selectLangevinStepSize(real maxStepSize, real errorTol, real tau, real kT, real2* __restrict__ dt,
const real4* __restrict__ velm, const long long* __restrict__ force, real* __restrict__ paramBuffer) {
// Calculate the error.
extern __shared__ real params[];
real* error = &params[MaxParams];
real err = 0;
unsigned int index = threadIdx.x;
const real scale = RECIP((real) 0xFFFFFFFF);
while (index < NUM_ATOMS) {
real3 f = make_real3(scale*force[index], scale*force[index+PADDED_NUM_ATOMS], scale*force[index+PADDED_NUM_ATOMS*2]);
real invMass = velm[index].w;
err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass;
index += blockDim.x*gridDim.x;
}
error[threadIdx.x] = err;
__syncthreads();
// Sum the errors from all threads.
for (unsigned int offset = 1; offset < blockDim.x; offset *= 2) {
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
error[threadIdx.x] += error[threadIdx.x+offset];
__syncthreads();
}
if (blockIdx.x*blockDim.x+threadIdx.x == 0) {
// Select the new step size.
real totalError = sqrt(error[0]/(NUM_ATOMS*3));
real newStepSize = sqrt(errorTol/totalError);
real oldStepSize = dt[0].y;
if (oldStepSize > 0.0f)
newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
if (newStepSize > maxStepSize)
newStepSize = maxStepSize;
dt[0].y = newStepSize;
// Recalculate the integration parameters.
real vscale = exp(-newStepSize/tau);
real fscale = (1-vscale)*tau;
real noisescale = sqrt(2*kT/tau)*sqrt(0.5f*(1-vscale*vscale)*tau);
params[VelScale] = vscale;
params[ForceScale] = fscale;
params[NoiseScale] = noisescale;
}
__syncthreads();
if (threadIdx.x < MaxParams)
paramBuffer[threadIdx.x] = params[threadIdx.x];
}
/**
* Enforce constraints on SETTLE clusters
*/
extern "C" __global__ void applySettle(int numClusters, float tol, const real4* __restrict__ oldPos, real4* __restrict__ posDelta, const real4* __restrict__ velm, const int4* __restrict__ clusterAtoms, const float2* __restrict__ clusterParams) {
int index = blockIdx.x*blockDim.x+threadIdx.x;
while (index < numClusters) {
// Load the data for this cluster.
int4 atoms = clusterAtoms[index];
float2 params = clusterParams[index];
real4 apos0 = oldPos[atoms.x];
real4 xp0 = posDelta[atoms.x];
real4 apos1 = oldPos[atoms.y];
real4 xp1 = posDelta[atoms.y];
real4 apos2 = oldPos[atoms.z];
real4 xp2 = posDelta[atoms.z];
real m0 = RECIP(velm[atoms.x].w);
real m1 = RECIP(velm[atoms.y].w);
real m2 = RECIP(velm[atoms.z].w);
// Apply the SETTLE algorithm.
real xb0 = apos1.x-apos0.x;
real yb0 = apos1.y-apos0.y;
real zb0 = apos1.z-apos0.z;
real xc0 = apos2.x-apos0.x;
real yc0 = apos2.y-apos0.y;
real zc0 = apos2.z-apos0.z;
real invTotalMass = RECIP(m0+m1+m2);
real xcom = (xp0.x*m0 + (xb0+xp1.x)*m1 + (xc0+xp2.x)*m2) * invTotalMass;
real ycom = (xp0.y*m0 + (yb0+xp1.y)*m1 + (yc0+xp2.y)*m2) * invTotalMass;
real zcom = (xp0.z*m0 + (zb0+xp1.z)*m1 + (zc0+xp2.z)*m2) * invTotalMass;
real xa1 = xp0.x - xcom;
real ya1 = xp0.y - ycom;
real za1 = xp0.z - zcom;
real xb1 = xb0 + xp1.x - xcom;
real yb1 = yb0 + xp1.y - ycom;
real zb1 = zb0 + xp1.z - zcom;
real xc1 = xc0 + xp2.x - xcom;
real yc1 = yc0 + xp2.y - ycom;
real zc1 = zc0 + xp2.z - zcom;
real xaksZd = yb0*zc0 - zb0*yc0;
real yaksZd = zb0*xc0 - xb0*zc0;
real zaksZd = xb0*yc0 - yb0*xc0;
real xaksXd = ya1*zaksZd - za1*yaksZd;
real yaksXd = za1*xaksZd - xa1*zaksZd;
real zaksXd = xa1*yaksZd - ya1*xaksZd;
real xaksYd = yaksZd*zaksXd - zaksZd*yaksXd;
real yaksYd = zaksZd*xaksXd - xaksZd*zaksXd;
real zaksYd = xaksZd*yaksXd - yaksZd*xaksXd;
real axlng = SQRT(xaksXd*xaksXd + yaksXd*yaksXd + zaksXd*zaksXd);
real aylng = SQRT(xaksYd*xaksYd + yaksYd*yaksYd + zaksYd*zaksYd);
real azlng = SQRT(xaksZd*xaksZd + yaksZd*yaksZd + zaksZd*zaksZd);
real trns11 = xaksXd / axlng;
real trns21 = yaksXd / axlng;
real trns31 = zaksXd / axlng;
real trns12 = xaksYd / aylng;
real trns22 = yaksYd / aylng;
real trns32 = zaksYd / aylng;
real trns13 = xaksZd / azlng;
real trns23 = yaksZd / azlng;
real trns33 = zaksZd / azlng;
real xb0d = trns11*xb0 + trns21*yb0 + trns31*zb0;
real yb0d = trns12*xb0 + trns22*yb0 + trns32*zb0;
real xc0d = trns11*xc0 + trns21*yc0 + trns31*zc0;
real yc0d = trns12*xc0 + trns22*yc0 + trns32*zc0;
real za1d = trns13*xa1 + trns23*ya1 + trns33*za1;
real xb1d = trns11*xb1 + trns21*yb1 + trns31*zb1;
real yb1d = trns12*xb1 + trns22*yb1 + trns32*zb1;
real zb1d = trns13*xb1 + trns23*yb1 + trns33*zb1;
real xc1d = trns11*xc1 + trns21*yc1 + trns31*zc1;
real yc1d = trns12*xc1 + trns22*yc1 + trns32*zc1;
real zc1d = trns13*xc1 + trns23*yc1 + trns33*zc1;
// --- Step2 A2' ---
float rc = 0.5f*params.y;
float rb = SQRT(params.x*params.x-rc*rc);
real ra = rb*(m1+m2)*invTotalMass;
rb -= ra;
real sinphi = za1d/ra;
real cosphi = SQRT(1-sinphi*sinphi);
real sinpsi = (zb1d-zc1d) / (2*rc*cosphi);
real cospsi = SQRT(1-sinpsi*sinpsi);
real ya2d = ra*cosphi;
real xb2d = - rc*cospsi;
real yb2d = - rb*cosphi - rc*sinpsi*sinphi;
real yc2d = - rb*cosphi + rc*sinpsi*sinphi;
real xb2d2 = xb2d*xb2d;
real hh2 = 4.0f*xb2d2 + (yb2d-yc2d)*(yb2d-yc2d) + (zb1d-zc1d)*(zb1d-zc1d);
real deltx = 2.0f*xb2d + SQRT(4.0f*xb2d2 - hh2 + params.y*params.y);
xb2d -= deltx*0.5f;
// --- Step3 al,be,ga ---
real alpha = (xb2d*(xb0d-xc0d) + yb0d*yb2d + yc0d*yc2d);
real beta = (xb2d*(yc0d-yb0d) + xb0d*yb2d + xc0d*yc2d);
real gamma = xb0d*yb1d - xb1d*yb0d + xc0d*yc1d - xc1d*yc0d;
real al2be2 = alpha*alpha + beta*beta;
real sintheta = (alpha*gamma - beta*SQRT(al2be2 - gamma*gamma)) / al2be2;
// --- Step4 A3' ---
real costheta = SQRT(1-sintheta*sintheta);
real xa3d = - ya2d*sintheta;
real ya3d = ya2d*costheta;
real za3d = za1d;
real xb3d = xb2d*costheta - yb2d*sintheta;
real yb3d = xb2d*sintheta + yb2d*costheta;
real zb3d = zb1d;
real xc3d = - xb2d*costheta - yc2d*sintheta;
real yc3d = - xb2d*sintheta + yc2d*costheta;
real zc3d = zc1d;
// --- Step5 A3 ---
real xa3 = trns11*xa3d + trns12*ya3d + trns13*za3d;
real ya3 = trns21*xa3d + trns22*ya3d + trns23*za3d;
real za3 = trns31*xa3d + trns32*ya3d + trns33*za3d;
real xb3 = trns11*xb3d + trns12*yb3d + trns13*zb3d;
real yb3 = trns21*xb3d + trns22*yb3d + trns23*zb3d;
real zb3 = trns31*xb3d + trns32*yb3d + trns33*zb3d;
real xc3 = trns11*xc3d + trns12*yc3d + trns13*zc3d;
real yc3 = trns21*xc3d + trns22*yc3d + trns23*zc3d;
real zc3 = trns31*xc3d + trns32*yc3d + trns33*zc3d;
xp0.x = xcom + xa3;
xp0.y = ycom + ya3;
xp0.z = zcom + za3;
xp1.x = xcom + xb3 - xb0;
xp1.y = ycom + yb3 - yb0;
xp1.z = zcom + zb3 - zb0;
xp2.x = xcom + xc3 - xc0;
xp2.y = ycom + yc3 - yc0;
xp2.z = zcom + zc3 - zc0;
// Record the new positions.
posDelta[atoms.x] = xp0;
posDelta[atoms.y] = xp1;
posDelta[atoms.z] = xp2;
index += blockDim.x*gridDim.x;
}
}
/**
* Enforce velocity constraints on SETTLE clusters
*/
extern "C" __global__ void constrainVelocities(int numClusters, float tol, const real4* __restrict__ oldPos, real4* __restrict__ posDelta, real4* __restrict__ velm, const int4* __restrict__ clusterAtoms, const float2* __restrict__ clusterParams) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numClusters; index += blockDim.x*gridDim.x) {
// Load the data for this cluster.
int4 atoms = clusterAtoms[index];
real4 apos0 = oldPos[atoms.x];
real4 apos1 = oldPos[atoms.y];
real4 apos2 = oldPos[atoms.z];
real4 v0 = velm[atoms.x];
real4 v1 = velm[atoms.y];
real4 v2 = velm[atoms.z];
// Compute intermediate quantities: the atom masses, the bond directions, the relative velocities,
// and the angle cosines and sines.
real mA = RECIP(v0.w);
real mB = RECIP(v1.w);
real mC = RECIP(v2.w);
real3 eAB = make_real3(apos1.x-apos0.x, apos1.y-apos0.y, apos1.z-apos0.z);
real3 eBC = make_real3(apos2.x-apos1.x, apos2.y-apos1.y, apos2.z-apos1.z);
real3 eCA = make_real3(apos0.x-apos2.x, apos0.y-apos2.y, apos0.z-apos2.z);
eAB *= RECIP(SQRT(eAB.x*eAB.x + eAB.y*eAB.y + eAB.z*eAB.z));
eBC *= RECIP(SQRT(eBC.x*eBC.x + eBC.y*eBC.y + eBC.z*eBC.z));
eCA *= RECIP(SQRT(eCA.x*eCA.x + eCA.y*eCA.y + eCA.z*eCA.z));
real vAB = (v1.x-v0.x)*eAB.x + (v1.y-v0.y)*eAB.y + (v1.z-v0.z)*eAB.z;
real vBC = (v2.x-v1.x)*eBC.x + (v2.y-v1.y)*eBC.y + (v2.z-v1.z)*eBC.z;
real vCA = (v0.x-v2.x)*eCA.x + (v0.y-v2.y)*eCA.y + (v0.z-v2.z)*eCA.z;
real cA = -(eAB.x*eCA.x + eAB.y*eCA.y + eAB.z*eCA.z);
real cB = -(eAB.x*eBC.x + eAB.y*eBC.y + eAB.z*eBC.z);
real cC = -(eBC.x*eCA.x + eBC.y*eCA.y + eBC.z*eCA.z);
real s2A = 1-cA*cA;
real s2B = 1-cB*cB;
real s2C = 1-cC*cC;
// Solve the equations. These are different from those in the SETTLE paper (JCC 13(8), pp. 952-962, 1992), because
// in going from equations B1 to B2, they make the assumption that mB=mC (but don't bother to mention they're
// making that assumption). We allow all three atoms to have different masses.
real mABCinv = RECIP(mA*mB*mC);
real denom = (((s2A*mB+s2B*mA)*mC+(s2A*mB*mB+2*(cA*cB*cC+1)*mA*mB+s2B*mA*mA))*mC+s2C*mA*mB*(mA+mB))*mABCinv;
real tab = ((cB*cC*mA-cA*mB-cA*mC)*vCA + (cA*cC*mB-cB*mC-cB*mA)*vBC + (s2C*mA*mA*mB*mB*mABCinv+(mA+mB+mC))*vAB)/denom;
real tbc = ((cA*cB*mC-cC*mB-cC*mA)*vCA + (s2A*mB*mB*mC*mC*mABCinv+(mA+mB+mC))*vBC + (cA*cC*mB-cB*mA-cB*mC)*vAB)/denom;
real tca = ((s2B*mA*mA*mC*mC*mABCinv+(mA+mB+mC))*vCA + (cA*cB*mC-cC*mB-cC*mA)*vBC + (cB*cC*mA-cA*mB-cA*mC)*vAB)/denom;
v0.x += (tab*eAB.x - tca*eCA.x)*v0.w;
v0.y += (tab*eAB.y - tca*eCA.y)*v0.w;
v0.z += (tab*eAB.z - tca*eCA.z)*v0.w;
v1.x += (tbc*eBC.x - tab*eAB.x)*v1.w;
v1.y += (tbc*eBC.y - tab*eAB.y)*v1.w;
v1.z += (tbc*eBC.z - tab*eAB.z)*v1.w;
v2.x += (tca*eCA.x - tbc*eBC.x)*v2.w;
v2.y += (tca*eCA.y - tbc*eBC.y)*v2.w;
v2.z += (tca*eCA.z - tbc*eBC.z)*v2.w;
velm[atoms.x] = v0;
velm[atoms.y] = v1;
velm[atoms.z] = v2;
}
}
\ No newline at end of file
/**
* Enforce constraints on SHAKE clusters
*/
extern "C" __global__ void applyShakeToHydrogens(int numClusters, real tol, const real4* __restrict__ oldPos, real4* __restrict__ posDelta, const int4* __restrict__ clusterAtoms, const float4* __restrict__ clusterParams) {
int index = blockIdx.x*blockDim.x+threadIdx.x;
while (index < numClusters) {
// Load the data for this cluster.
int4 atoms = clusterAtoms[index];
float4 params = clusterParams[index];
real4 pos = oldPos[atoms.x];
real4 xpi = posDelta[atoms.x];
real4 pos1 = oldPos[atoms.y];
real4 xpj1 = posDelta[atoms.y];
real4 pos2 = make_real4(0);
real4 xpj2 = make_real4(0);
real invMassCentral = params.x;
real avgMass = params.y;
float d2 = params.z;
float invMassPeripheral = params.w;
if (atoms.z != -1) {
pos2 = oldPos[atoms.z];
xpj2 = posDelta[atoms.z];
}
real4 pos3 = make_real4(0);
real4 xpj3 = make_real4(0);
if (atoms.w != -1) {
pos3 = oldPos[atoms.w];
xpj3 = posDelta[atoms.w];
}
// Precompute quantities.
real3 rij1 = make_real3(pos.x-pos1.x, pos.y-pos1.y, pos.z-pos1.z);
real3 rij2 = make_real3(pos.x-pos2.x, pos.y-pos2.y, pos.z-pos2.z);
real3 rij3 = make_real3(pos.x-pos3.x, pos.y-pos3.y, pos.z-pos3.z);
real rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
real rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
real rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
real ld1 = d2-rij1sq;
real ld2 = d2-rij2sq;
real ld3 = d2-rij3sq;
// Iterate until convergence.
bool converged = false;
int iteration = 0;
while (iteration < 15 && !converged) {
converged = true;
#ifdef CONSTRAIN_VELOCITIES
real3 rpij = make_real3(xpi.x-xpj1.x, xpi.y-xpj1.y, xpi.z-xpj1.z);
real rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
real delta = -2.0f*avgMass*rrpr/rij1sq;
real3 dr = rij1*delta;
xpi.x += dr.x*invMassCentral;
xpi.y += dr.y*invMassCentral;
xpi.z += dr.z*invMassCentral;
xpj1.x -= dr.x*invMassPeripheral;
xpj1.y -= dr.y*invMassPeripheral;
xpj1.z -= dr.z*invMassPeripheral;
if (fabs(delta) > tol)
converged = false;
if (atoms.z != -1) {
rpij = make_real3(xpi.x-xpj2.x, xpi.y-xpj2.y, xpi.z-xpj2.z);
rrpr = rpij.x*rij2.x + rpij.y*rij2.y + rpij.z*rij2.z;
delta = -2.0f*avgMass*rrpr/rij2sq;
dr = rij2*delta;
xpi.x += dr.x*invMassCentral;
xpi.y += dr.y*invMassCentral;
xpi.z += dr.z*invMassCentral;
xpj2.x -= dr.x*invMassPeripheral;
xpj2.y -= dr.y*invMassPeripheral;
xpj2.z -= dr.z*invMassPeripheral;
if (fabs(delta) > tol)
converged = false;
}
if (atoms.w != -1) {
rpij = make_real3(xpi.x-xpj3.x, xpi.y-xpj3.y, xpi.z-xpj3.z);
rrpr = rpij.x*rij3.x + rpij.y*rij3.y + rpij.z*rij3.z;
delta = -2.0f*avgMass*rrpr/rij3sq;
dr = rij3*delta;
xpi.x += dr.x*invMassCentral;
xpi.y += dr.y*invMassCentral;
xpi.z += dr.z*invMassCentral;
xpj3.x -= dr.x*invMassPeripheral;
xpj3.y -= dr.y*invMassPeripheral;
xpj3.z -= dr.z*invMassPeripheral;
if (fabs(delta) > tol)
converged = false;
}
#else
real3 rpij = make_real3(xpi.x-xpj1.x, xpi.y-xpj1.y, xpi.z-xpj1.z);
real rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
real rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
real diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
if (diff >= 1.0f) {
real acor = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
real3 dr = rij1*acor;
xpi.x += dr.x*invMassCentral;
xpi.y += dr.y*invMassCentral;
xpi.z += dr.z*invMassCentral;
xpj1.x -= dr.x*invMassPeripheral;
xpj1.y -= dr.y*invMassPeripheral;
xpj1.z -= dr.z*invMassPeripheral;
converged = false;
}
if (atoms.z != -1) {
rpij = make_real3(xpi.x-xpj2.x, xpi.y-xpj2.y, xpi.z-xpj2.z);
rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
rrpr = rij2.x*rpij.x + rij2.y*rpij.y + rij2.z*rpij.z;
diff = fabs(ld2-2.0f*rrpr-rpsqij) / (d2*tol);
if (diff >= 1.0f) {
real acor = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
real3 dr = rij2*acor;
xpi.x += dr.x*invMassCentral;
xpi.y += dr.y*invMassCentral;
xpi.z += dr.z*invMassCentral;
xpj2.x -= dr.x*invMassPeripheral;
xpj2.y -= dr.y*invMassPeripheral;
xpj2.z -= dr.z*invMassPeripheral;
converged = false;
}
}
if (atoms.w != -1) {
rpij = make_real3(xpi.x-xpj3.x, xpi.y-xpj3.y, xpi.z-xpj3.z);
rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
rrpr = rij3.x*rpij.x + rij3.y*rpij.y + rij3.z*rpij.z;
diff = fabs(ld3 - 2.0f*rrpr - rpsqij) / (d2*tol);
if (diff >= 1.0f) {
real acor = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
real3 dr = rij3*acor;
xpi.x += dr.x*invMassCentral;
xpi.y += dr.y*invMassCentral;
xpi.z += dr.z*invMassCentral;
xpj3.x -= dr.x*invMassPeripheral;
xpj3.y -= dr.y*invMassPeripheral;
xpj3.z -= dr.z*invMassPeripheral;
converged = false;
}
}
#endif
iteration++;
}
// Record the new positions.
posDelta[atoms.x] = xpi;
posDelta[atoms.y] = xpj1;
if (atoms.z != -1)
posDelta[atoms.z] = xpj2;
if (atoms.w != -1)
posDelta[atoms.w] = xpj3;
index += blockDim.x*gridDim.x;
}
}
...@@ -51,36 +51,38 @@ extern "C" __global__ void integrateVerletPart2(real2* __restrict__ dt, real4* _ ...@@ -51,36 +51,38 @@ extern "C" __global__ void integrateVerletPart2(real2* __restrict__ dt, real4* _
/** /**
* Select the step size to use for the next step. * Select the step size to use for the next step.
*/ */
//
//extern "C" __global__ void selectVerletStepSize(real maxStepSize, real errorTol, real2* __restrict__ dt, const real4* __restrict__ velm, const real4* __restrict__ force, __local real* __restrict__ error) { extern "C" __global__ void selectVerletStepSize(real maxStepSize, real errorTol, real2* __restrict__ dt, const real4* __restrict__ velm, const long long* __restrict__ force) {
// // Calculate the error. // Calculate the error.
//
// real err = 0.0f; extern __shared__ real error[];
// for (int index = threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { real err = 0.0f;
// real4 f = force[index]; const real scale = RECIP((real) 0xFFFFFFFF);
// real invMass = velm[index].w; for (int index = threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass; real3 f = make_real3(scale*force[index], scale*force[index+PADDED_NUM_ATOMS], scale*force[index+PADDED_NUM_ATOMS*2]);
// } real invMass = velm[index].w;
// error[threadIdx.x] = err; err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass;
// __syncthreads; }
// error[threadIdx.x] = err;
// // Sum the errors from all threads. __syncthreads();
//
// for (unsigned int offset = 1; offset < get_local_size(0); offset *= 2) { // Sum the errors from all threads.
// if (threadIdx.x+offset < get_local_size(0) && (threadIdx.x&(2*offset-1)) == 0)
// error[threadIdx.x] += error[threadIdx.x+offset]; for (unsigned int offset = 1; offset < blockDim.x; offset *= 2) {
// __syncthreads; if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
// } error[threadIdx.x] += error[threadIdx.x+offset];
// if (threadIdx.x == 0) { __syncthreads();
// real totalError = sqrt(error[0]/(NUM_ATOMS*3)); }
// real newStepSize = sqrt(errorTol/totalError); if (threadIdx.x == 0) {
// real oldStepSize = dt[0].y; real totalError = SQRT(error[0]/(NUM_ATOMS*3));
// if (oldStepSize > 0.0f) real newStepSize = SQRT(errorTol/totalError);
// newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase. real oldStepSize = dt[0].y;
// if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize) if (oldStepSize > 0.0f)
// newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator. newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
// if (newStepSize > maxStepSize) if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
// newStepSize = maxStepSize; newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
// dt[0].y = newStepSize; if (newStepSize > maxStepSize)
// } newStepSize = maxStepSize;
//} dt[0].y = newStepSize;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/System.h"
/**
* This tests the CUDA implementation of BrownianIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/BrownianIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testSingleBond() {
CudaPlatform platform;
System system;
system.addParticle(2.0);
system.addParticle(2.0);
double dt = 0.01;
BrownianIntegrator integrator(0, 0.1, dt);
HarmonicBondForce* forceField = new HarmonicBondForce();
forceField->addBond(0, 1, 1.5, 1);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
// This is simply an overdamped harmonic oscillator, so compare it to the analytical solution.
double rate = 2*1.0/(0.1*2.0);
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Velocities);
double time = state.getTime();
double expectedDist = 1.5+0.5*std::exp(-rate*time);
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
if (i > 0) {
double expectedSpeed = -0.5*rate*std::exp(-rate*(time-0.5*dt));
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.11);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.11);
}
integrator.step(1);
}
}
void testTemperature() {
const int numParticles = 8;
const int numBonds = numParticles-1;
const double temp = 10.0;
CudaPlatform platform;
System system;
BrownianIntegrator integrator(temp, 2.0, 0.01);
HarmonicBondForce* forceField = new HarmonicBondForce();
for (int i = 0; i < numParticles; ++i)
system.addParticle(2.0);
for (int i = 0; i < numBonds; ++i)
forceField->addBond(i, i+1, 1.0, 5.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; ++i)
positions[i] = Vec3(i, 0, 0);
context.setPositions(positions);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the temperature is correct.
double pe = 0.0;
const int steps = 50000;
for (int i = 0; i < steps; ++i) {
State state = context.getState(State::Energy);
pe += state.getPotentialEnergy();
integrator.step(1);
}
pe /= steps;
double expected = 0.5*numBonds*BOLTZ*temp;
ASSERT_USUALLY_EQUAL_TOL(expected, pe, 0.1*expected);
}
void testConstraints() {
const int numParticles = 8;
const int numConstraints = 5;
const double temp = 20.0;
CudaPlatform platform;
System system;
BrownianIntegrator integrator(temp, 2.0, 0.001);
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
system.addConstraint(0, 1, 1.0);
system.addConstraint(1, 2, 1.0);
system.addConstraint(2, 3, 1.0);
system.addConstraint(4, 5, 1.0);
system.addConstraint(6, 7, 1.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3(i, 0, 0);
velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions);
for (int j = 0; j < numConstraints; ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 1e-4);
}
integrator.step(1);
}
}
void testRandomSeed() {
const int numParticles = 8;
const double temp = 100.0;
const double collisionFreq = 10.0;
CudaPlatform platform;
System system;
BrownianIntegrator integrator(temp, 2.0, 0.001);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
velocities[i] = Vec3(0, 0, 0);
}
// Try twice with the same random seed.
integrator.setRandomNumberSeed(5);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state1 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state2 = context.getState(State::Positions);
// Try twice with a different random seed.
integrator.setRandomNumberSeed(10);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state3 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state4 = context.getState(State::Positions);
// Compare the results.
for (int i = 0; i < numParticles; i++) {
for (int j = 0; j < 3; j++) {
ASSERT(state1.getPositions()[i][j] == state2.getPositions()[i][j]);
ASSERT(state3.getPositions()[i][j] == state4.getPositions()[i][j]);
ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
}
}
}
int main() {
try {
testSingleBond();
testTemperature();
testConstraints();
testRandomSeed();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment