Commit 6e434a02 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement new CUDA platform: HarmonicBondForce and VerletIntegrator

parent 1f0ec7b5
......@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include "CudaContext.h"
#include <iostream>
#include <sstream>
#include <vector>
......@@ -36,7 +37,7 @@ CudaArray::CudaArray(int size, int elementSize, const std::string& name) :
CUresult result = cuMemAlloc(&pointer, size*elementSize);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error creating array "<<name<<": "<<result;
str<<"Error creating array "<<name<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
......@@ -46,7 +47,7 @@ CudaArray::~CudaArray() {
CUresult result = cuMemFree(pointer);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error deleting array "<<name<<": "<<result;
str<<"Error deleting array "<<name<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
......@@ -60,7 +61,7 @@ void CudaArray::upload(void* data, bool blocking) {
result = cuMemcpyHtoDAsync(pointer, data, size*elementSize, 0);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error uploading array "<<name<<": "<<result;
str<<"Error uploading array "<<name<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
......@@ -73,7 +74,7 @@ void CudaArray::download(void* data, bool blocking) const {
result = cuMemcpyDtoHAsync(data, pointer, size*elementSize, 0);
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error downloading array "<<name<<": "<<result;
str<<"Error downloading array "<<name<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
......@@ -26,6 +26,7 @@
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaKernelSources.h"
#include "openmm/OpenMMException.h"
#include "CudaNonbondedUtilities.h"
#include <iostream>
......@@ -81,8 +82,8 @@ void CudaBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < width; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][startAtom+atom];
}
CudaArray* indices = CudaArray::create<unsigned int>(indexVec.size(), "bondedIndices");
indices->upload(indexVec);
CudaArray* indices = new CudaArray(numBonds, 4*width, "bondedIndices");
indices->upload(&indexVec[0]);
atomIndices[i].push_back(indices);
startAtom += width;
}
......@@ -91,13 +92,14 @@ void CudaBondedUtilities::initialize(const System& system) {
// Create the kernel.
stringstream s;
s<<CudaKernelSources::vectorOps;
for (int i = 0; i < (int) prefixCode.size(); i++)
s<<prefixCode[i];
s<<"extern \"C\" __global__ void computeBondedForces(long* __restrict__ forceBuffer, real* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups";
s<<"extern \"C\" __global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer, real* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups";
for (int force = 0; force < numForces; force++) {
for (int i = 0; i < (int) atomIndices[force].size(); i++) {
int indexWidth = atomIndices[force][i]->getElementSize()/4;
string indexType = "unsigned int"+(indexWidth == 1 ? "" : context.intToString(indexWidth));
string indexType = "uint"+context.intToString(indexWidth);
s<<", const "<<indexType<<"* __restrict__ atomIndices"<<force<<"_"<<i;
}
}
......@@ -129,10 +131,10 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
for (int i = 0; i < (int) atomIndices[forceIndex].size(); i++) {
int indexWidth = atomIndices[forceIndex][i]->getElementSize()/4;
suffix = (indexWidth == 1 ? suffix1 : suffix4);
string indexType = "unsigned int"+(indexWidth == 1 ? "" : context.intToString(indexWidth));
string indexType = "uint"+context.intToString(indexWidth);
s<<" "<<indexType<<" atoms"<<i<<" = atomIndices"<<forceIndex<<"_"<<i<<"[index];\n";
s<<" "<<indexType<<" buffers = bufferIndices"<<forceIndex<<"[index];\n";
for (int j = 0; j < indexWidth; j++) {
int atomsToLoad = min(indexWidth, numAtoms-startAtom);
for (int j = 0; j < atomsToLoad; j++) {
s<<" unsigned int atom"<<(startAtom+j+1)<<" = atoms"<<i<<suffix[j]<<";\n";
s<<" real4 pos"<<(j+1)<<" = posq[atom"<<(j+1)<<"];\n";
}
......@@ -140,34 +142,28 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
}
s<<computeForce<<"\n";
for (int i = 0; i < numAtoms; i++) {
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"], (long) (force.x*0xFFFFFFFF));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], (long) (force.x*0xFFFFFFFF));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], (long) (force.x*0xFFFFFFFF));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".x*0xFFFFFFFF)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".y*0xFFFFFFFF)));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".z*0xFFFFFFFF)));\n";
s<<" __threadfence_block();\n";
}
s<<"}\n";
return s.str();
}
void CudaBondedUtilities::computeInteractions(int groups) {
// if (!hasInitializedKernels) {
// hasInitializedKernels = true;
// for (int i = 0; i < (int) forceSets.size(); i++) {
// int index = 0;
// cl::Kernel& kernel = kernels[i];
// kernel.setArg<cl::Buffer>(index++, context.getForceBuffers().getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, context.getEnergyBuffer().getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
// index++;
// for (int j = 0; j < (int) forceSets[i].size(); j++) {
// kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer());
// }
// for (int j = 0; j < (int) arguments.size(); j++)
// kernel.setArg<cl::Memory>(index++, *arguments[j]);
// }
// }
// for (int i = 0; i < (int) kernels.size(); i++) {
// kernels[i].setArg<cl_int>(3, groups);
// context.executeKernel(kernels[i], maxBonds);
// }
if (!hasInitializedKernels) {
hasInitializedKernels = true;
kernelArgs.push_back(&context.getForce().getDevicePointer());
kernelArgs.push_back(&context.getEnergyBuffer().getDevicePointer());
kernelArgs.push_back(&context.getPosq().getDevicePointer());
kernelArgs.push_back(NULL);
for (int i = 0; i < (int) atomIndices.size(); i++)
for (int j = 0; j < (int) atomIndices[i].size(); j++)
kernelArgs.push_back(&atomIndices[i][j]->getDevicePointer());
for (int i = 0; i < (int) arguments.size(); i++)
kernelArgs.push_back(&arguments[i]);
}
kernelArgs[3] = &groups;
context.executeKernel(kernel, &kernelArgs[0], maxBonds);
}
......@@ -59,7 +59,7 @@ namespace OpenMM {
* <li>The positions of those atoms will have been stored in the real4 variables "pos1", "pos2", ....</li>
* <li>A real variable called "energy" will exist. Your code should add the potential energy of the
* bond to that variable.</li>
* <li>Your code should define real4 variables called "force1", "force2", ... that contain the force to
* <li>Your code should define real3 variables called "force1", "force2", ... that contain the force to
* apply to each atom.</li>
* </ol>
*
......@@ -69,8 +69,8 @@ namespace OpenMM {
* <tt><pre>
* real4 delta = pos2-pos1;
* energy += delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
* real4 force1 = 2.0f*delta;
* real4 force2 = -2.0f*delta;
* real3 force1 = 2.0f*delta;
* real3 force2 = -2.0f*delta;
* </pre></tt>
*
* Interactions will often depend on parameters or other data. Call addArgument() to provide the data
......@@ -129,6 +129,7 @@ private:
std::vector<std::string> argTypes;
std::vector<std::vector<CudaArray*> > atomIndices;
std::vector<std::string> prefixCode;
std::vector<void*> kernelArgs;
int numForceBuffers, maxBonds;
bool hasInitializedKernels;
};
......
......@@ -30,7 +30,7 @@
#include <cmath>
#include "CudaContext.h"
#include "CudaArray.h"
//#include "CudaBondedUtilities.h"
#include "CudaBondedUtilities.h"
#include "CudaForceInfo.h"
#include "CudaIntegrationUtilities.h"
#include "CudaKernelSources.h"
......@@ -67,8 +67,8 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
velm(NULL), /*forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL),*/ integration(NULL), expression(NULL),
/*bonded(NULL), nonbonded(NULL),*/ thread(NULL) {
velm(NULL), force(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL), expression(NULL),
bonded(NULL), /*nonbonded(NULL),*/ thread(NULL) {
if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
hasInitializedCuda = true;
......@@ -138,10 +138,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
// bonded = new CudaBondedUtilities(*this);
bonded = new CudaBondedUtilities(*this);
// nonbonded = new CudaNonbondedUtilities(*this);
if (useDoublePrecision) {
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, paddedNumAtoms*sizeof(double4), 0));
posq = CudaArray::create<double4>(paddedNumAtoms, "posq");
velm = CudaArray::create<double4>(paddedNumAtoms, "velm");
compilationDefines["make_real2"] = "make_double2";
......@@ -149,7 +148,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_real4"] = "make_double4";
}
else {
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, paddedNumAtoms*sizeof(float4), 0));
posq = CudaArray::create<float4>(paddedNumAtoms, "posq");
velm = CudaArray::create<float4>(paddedNumAtoms, "velm");
compilationDefines["make_real2"] = "make_float2";
......@@ -203,22 +201,16 @@ CudaContext::~CudaContext() {
delete posq;
if (velm != NULL)
delete velm;
// if (force != NULL)
// delete force;
// if (forceBuffers != NULL)
// delete forceBuffers;
// if (longForceBuffer != NULL)
// delete longForceBuffer;
// if (energyBuffer != NULL)
// delete energyBuffer;
// if (atomIndex != NULL)
// delete atomIndex;
if (force != NULL)
delete force;
if (energyBuffer != NULL)
delete energyBuffer;
if (integration != NULL)
delete integration;
if (expression != NULL)
delete expression;
// if (bonded != NULL)
// delete bonded;
if (bonded != NULL)
delete bonded;
// if (nonbonded != NULL)
// delete nonbonded;
if (thread != NULL)
......@@ -229,6 +221,17 @@ CudaContext::~CudaContext() {
}
void CudaContext::initialize() {
string errorMessage = "Error initializing Context";
if (useDoublePrecision) {
energyBuffer = CudaArray::create<double>(numThreadBlocks*ThreadBlockSize, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*4, numThreadBlocks*ThreadBlockSize);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
}
else {
energyBuffer = CudaArray::create<float>(numThreadBlocks*ThreadBlockSize, "energyBuffer");
int pinnedBufferSize = max(paddedNumAtoms*6, numThreadBlocks*ThreadBlockSize);
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), 0));
}
for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i);
if (useDoublePrecision)
......@@ -237,11 +240,10 @@ void CudaContext::initialize() {
((float4*) pinnedBuffer)[i] = make_float4(0.0f, 0.0f, 0.0f, mass == 0.0 ? 0.0f : (float) (1.0/mass));
}
velm->upload(pinnedBuffer);
// bonded->initialize(system);
force = CudaArray::create<long3>(paddedNumAtoms, "force");
addAutoclearBuffer(force->getDevicePointer(), force->getSize()*6);
energyBuffer = CudaArray::create<float>(numThreadBlocks*ThreadBlockSize, "energyBuffer");
addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize());
bonded->initialize(system);
force = CudaArray::create<long long>(paddedNumAtoms*3, "force");
addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize());
addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize());
atomIndexDevice = CudaArray::create<int>(paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
......@@ -448,82 +450,63 @@ void CudaContext::executeKernel(CUfunction kernel, void** arguments, int threads
}
void CudaContext::clearBuffer(CudaArray& array) {
clearBuffer(array.getDevicePointer(), array.getSize()*array.getElementSize()/4);
clearBuffer(array.getDevicePointer(), array.getSize()*array.getElementSize());
}
void CudaContext::clearBuffer(CUdeviceptr memory, int size) {
void* args[] = {&memory, &size};
int words = size/4;
void* args[] = {&memory, &words};
executeKernel(clearBufferKernel, args, size, 128);
}
void CudaContext::addAutoclearBuffer(CUdeviceptr memory, int size) {
autoclearBuffers.push_back(memory);
autoclearBufferSizes.push_back(size);
autoclearBufferSizes.push_back(size/4);
}
//void CudaContext::clearAutoclearBuffers() {
// int base = 0;
// int total = autoclearBufferSizes.size();
// while (total-base >= 6) {
// clearSixBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
// clearSixBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
// clearSixBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
// clearSixBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
// clearSixBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
// clearSixBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
// clearSixBuffersKernel.setArg<cl::Memory>(6, *autoclearBuffers[base+3]);
// clearSixBuffersKernel.setArg<cl_int>(7, autoclearBufferSizes[base+3]);
// clearSixBuffersKernel.setArg<cl::Memory>(8, *autoclearBuffers[base+4]);
// clearSixBuffersKernel.setArg<cl_int>(9, autoclearBufferSizes[base+4]);
// clearSixBuffersKernel.setArg<cl::Memory>(10, *autoclearBuffers[base+5]);
// clearSixBuffersKernel.setArg<cl_int>(11, autoclearBufferSizes[base+5]);
// executeKernel(clearSixBuffersKernel, max(max(max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), autoclearBufferSizes[base+4]), autoclearBufferSizes[base+5]), 128);
// base += 6;
// }
// if (total-base == 5) {
// clearFiveBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
// clearFiveBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
// clearFiveBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
// clearFiveBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
// clearFiveBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
// clearFiveBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
// clearFiveBuffersKernel.setArg<cl::Memory>(6, *autoclearBuffers[base+3]);
// clearFiveBuffersKernel.setArg<cl_int>(7, autoclearBufferSizes[base+3]);
// clearFiveBuffersKernel.setArg<cl::Memory>(8, *autoclearBuffers[base+4]);
// clearFiveBuffersKernel.setArg<cl_int>(9, autoclearBufferSizes[base+4]);
// executeKernel(clearFiveBuffersKernel, max(max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), autoclearBufferSizes[base+4]), 128);
// }
// else if (total-base == 4) {
// clearFourBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
// clearFourBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
// clearFourBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
// clearFourBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
// clearFourBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
// clearFourBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
// clearFourBuffersKernel.setArg<cl::Memory>(6, *autoclearBuffers[base+3]);
// clearFourBuffersKernel.setArg<cl_int>(7, autoclearBufferSizes[base+3]);
// executeKernel(clearFourBuffersKernel, max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), 128);
// }
// else if (total-base == 3) {
// clearThreeBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
// clearThreeBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
// clearThreeBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
// clearThreeBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
// clearThreeBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
// clearThreeBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
// executeKernel(clearThreeBuffersKernel, max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), 128);
// }
// else if (total-base == 2) {
// clearTwoBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
// clearTwoBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
// clearTwoBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
// clearTwoBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
// executeKernel(clearTwoBuffersKernel, max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), 128);
// }
// else if (total-base == 1) {
// clearBuffer(*autoclearBuffers[base], autoclearBufferSizes[base]);
// }
//}
void CudaContext::clearAutoclearBuffers() {
int base = 0;
int total = autoclearBufferSizes.size();
while (total-base >= 6) {
void* args[] = {&autoclearBuffers[base], &autoclearBufferSizes[base],
&autoclearBuffers[base+1], &autoclearBufferSizes[base+1],
&autoclearBuffers[base+2], &autoclearBufferSizes[base+2],
&autoclearBuffers[base+3], &autoclearBufferSizes[base+3],
&autoclearBuffers[base+4], &autoclearBufferSizes[base+4],
&autoclearBuffers[base+5], &autoclearBufferSizes[base+5]};
executeKernel(clearSixBuffersKernel, args, max(max(max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), autoclearBufferSizes[base+4]), autoclearBufferSizes[base+5]), 128);
base += 6;
}
if (total-base == 5) {
void* args[] = {&autoclearBuffers[base], &autoclearBufferSizes[base],
&autoclearBuffers[base+1], &autoclearBufferSizes[base+1],
&autoclearBuffers[base+2], &autoclearBufferSizes[base+2],
&autoclearBuffers[base+3], &autoclearBufferSizes[base+3],
&autoclearBuffers[base+4], &autoclearBufferSizes[base+4]};
executeKernel(clearFiveBuffersKernel, args, max(max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), autoclearBufferSizes[base+4]), 128);
}
else if (total-base == 4) {
void* args[] = {&autoclearBuffers[base], &autoclearBufferSizes[base],
&autoclearBuffers[base+1], &autoclearBufferSizes[base+1],
&autoclearBuffers[base+2], &autoclearBufferSizes[base+2],
&autoclearBuffers[base+3], &autoclearBufferSizes[base+3]};
executeKernel(clearFourBuffersKernel, args, max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), 128);
}
else if (total-base == 3) {
void* args[] = {&autoclearBuffers[base], &autoclearBufferSizes[base],
&autoclearBuffers[base+1], &autoclearBufferSizes[base+1],
&autoclearBuffers[base+2], &autoclearBufferSizes[base+2]};
executeKernel(clearThreeBuffersKernel, args, max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), 128);
}
else if (total-base == 2) {
void* args[] = {&autoclearBuffers[base], &autoclearBufferSizes[base],
&autoclearBuffers[base+1], &autoclearBufferSizes[base+1]};
executeKernel(clearTwoBuffersKernel, args, max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), 128);
}
else if (total-base == 1) {
clearBuffer(autoclearBuffers[base], autoclearBufferSizes[base]*4);
}
}
void CudaContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds) {
// Recursively tag atoms as belonging to a particular molecule.
......@@ -539,7 +522,7 @@ void CudaContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMo
*/
class CudaContext::VirtualSiteInfo : public CudaForceInfo {
public:
VirtualSiteInfo(const System& system) : CudaForceInfo(0) {
VirtualSiteInfo(const System& system) {
for (int i = 0; i < system.getNumParticles(); i++) {
if (system.isVirtualSite(i)) {
siteTypes.push_back(&typeid(system.getVirtualSite(i)));
......
......@@ -125,41 +125,42 @@ public:
return *velm;
}
/**
* Get the array which contains the force on each atom (respresented as a long3 in 64 bit fixed point).
* Get the array which contains the force on each atom (represented as three long longs in 64 bit fixed point).
*/
CudaArray& getForce() {
return *force;
}
// /**
// * Get the array which contains the buffers in which forces are computed.
// */
// CudaArray<mm_float4>& getForceBuffers() {
// return *forceBuffers;
// }
// /**
// * Get the array which contains a contribution to each force represented as 64 bit fixed point.
// */
// CudaArray<cl_long>& getLongForceBuffer() {
// return *longForceBuffer;
// }
// /**
// * Get the array which contains the buffer in which energy is computed.
// */
// CudaArray<cl_float>& getEnergyBuffer() {
// return *energyBuffer;
// }
// /**
// * Get the array which contains the index of each atom.
// */
// CudaArray<cl_int>& getAtomIndex() {
// return *atomIndex;
// }
// /**
// * Get the number of cells by which the positions are offset.
// */
// std::vector<mm_int4>& getPosCellOffsets() {
// return posCellOffsets;
// }
/**
* Get the array which contains the buffer in which energy is computed.
*/
CudaArray& getEnergyBuffer() {
return *energyBuffer;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
*/
void* getPinnedBuffer() {
return pinnedBuffer;
}
/**
* Get the host-side vector which contains the index of each atom.
*/
const std::vector<int>& getAtomIndex() const {
return atomIndex;
}
/**
* Get the array which contains the index of each atom.
*/
CudaArray& getAtomIndexArray() {
return *atomIndexDevice;
}
/**
* Get the number of cells by which the positions are offset.
*/
std::vector<int4>& getPosCellOffsets() {
return posCellOffsets;
}
/**
* Replace all occurrences of a list of substrings.
*
......@@ -210,20 +211,20 @@ public:
* Set all elements of an array to 0.
*
* @param memory the memory to clear
* @param size the number of 4-byte elements in the buffer
* @param size the size of the buffer in bytes
*/
void clearBuffer(CUdeviceptr memory, int size);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
* @param memory the memory to clear
* @param size the number of 4-byte elements in the buffer
* @param size the size of the buffer in bytes
*/
void addAutoclearBuffer(CUdeviceptr memory, int size);
// /**
// * Clear all buffers that have been registered with addAutoclearBuffer().
// */
// void clearAutoclearBuffers();
/**
* Clear all buffers that have been registered with addAutoclearBuffer().
*/
void clearAutoclearBuffers();
/**
* Get the current simulation time.
*/
......@@ -309,27 +310,27 @@ public:
/**
* Convert a CUDA result code to the corresponding string description.
*/
std::string getErrorString(CUresult result);
static std::string getErrorString(CUresult result);
// /**
// * Get the size of the periodic box.
// */
// float4 getPeriodicBoxSize() const {
// return periodicBoxSize;
// }
// /**
// * Set the size of the periodic box.
// */
// void setPeriodicBoxSize(double xsize, double ysize, double zsize) {
// periodicBoxSize = make_float4((float) xsize, (float) ysize, (float) zsize, 0);
// invPeriodicBoxSize = make_float4((float) (1.0/xsize), (float) (1.0/ysize), (float) (1.0/zsize), 0);
// }
// /**
// * Get the inverse of the size of the periodic box.
// */
// float4 getInvPeriodicBoxSize() const {
// return invPeriodicBoxSize;
// }
/**
* Get the size of the periodic box.
*/
double4 getPeriodicBoxSize() const {
return periodicBoxSize;
}
/**
* Set the size of the periodic box.
*/
void setPeriodicBoxSize(double xsize, double ysize, double zsize) {
periodicBoxSize = make_double4(xsize, ysize, zsize, 0.0);
invPeriodicBoxSize = make_double4(1.0/xsize, 1.0/ysize, 1.0/zsize, 0.0);
}
/**
* Get the inverse of the size of the periodic box.
*/
double4 getInvPeriodicBoxSize() const {
return invPeriodicBoxSize;
}
/**
* Get the CudaIntegrationUtilities for this context.
*/
......@@ -342,12 +343,12 @@ public:
CudaExpressionUtilities& getExpressionUtilities() {
return *expression;
}
// /**
// * Get the CudaBondedUtilities for this context.
// */
// CudaBondedUtilities& getBondedUtilities() {
// return *bonded;
// }
/**
* Get the CudaBondedUtilities for this context.
*/
CudaBondedUtilities& getBondedUtilities() {
return *bonded;
}
// /**
// * Get the CudaNonbondedUtilities for this context.
// */
......@@ -428,8 +429,8 @@ private:
int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, accumulateInDouble, contextIsValid, atomsWereReordered, moleculesInvalid;
std::string compiler, tempDir, gpuArchitecture;
float4 periodicBoxSize;
float4 invPeriodicBoxSize;
double4 periodicBoxSize;
double4 invPeriodicBoxSize;
std::string defaultOptimizationOptions;
std::map<std::string, std::string> compilationDefines;
CUcontext context;
......@@ -456,7 +457,7 @@ private:
std::vector<ReorderListener*> reorderListeners;
CudaIntegrationUtilities* integration;
CudaExpressionUtilities* expression;
// CudaBondedUtilities* bonded;
CudaBondedUtilities* bonded;
// CudaNonbondedUtilities* nonbonded;
WorkThread* thread;
};
......
......@@ -39,13 +39,7 @@ namespace OpenMM {
class OPENMM_EXPORT CudaForceInfo {
public:
CudaForceInfo(int requiredForceBuffers) : requiredForceBuffers(requiredForceBuffers) {
}
/**
* Get the number of force buffers this force requires.
*/
int getRequiredForceBuffers() {
return requiredForceBuffers;
CudaForceInfo() {
}
/**
* Get whether or not two particles have identical force field parameters.
......@@ -63,8 +57,6 @@ public:
* Get whether two particle groups are identical.
*/
virtual bool areGroupsIdentical(int group1, int group2);
private:
int requiredForceBuffers;
};
} // namespace OpenMM
......
......@@ -672,15 +672,15 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete vsiteOutOfPlaneWeights;
}
//void CudaIntegrationUtilities::applyConstraints(double tol) {
// applyConstraints(false, tol);
//}
//
//void CudaIntegrationUtilities::applyVelocityConstraints(double tol) {
// applyConstraints(true, tol);
//}
//
//void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) {
void CudaIntegrationUtilities::applyConstraints(double tol) {
applyConstraints(false, tol);
}
void CudaIntegrationUtilities::applyVelocityConstraints(double tol) {
applyConstraints(true, tol);
}
void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) {
// bool hasInitialized;
// CUfunction settleKernel, shakeKernel, ccmaForceKernel, ccmaUpdateKernel;
// if (constrainVelocities) {
......@@ -772,19 +772,19 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
// }
// }
// }
//}
//
//void CudaIntegrationUtilities::computeVirtualSites() {
}
void CudaIntegrationUtilities::computeVirtualSites() {
// if (numVsites > 0)
// context.executeKernel(vsitePositionKernel, numVsites);
//}
//
//void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
}
void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
// if (numVsites > 0) {
// vsiteForceKernel.setArg<cl::Buffer>(1, context.getForce().getDeviceBuffer());
// context.executeKernel(vsiteForceKernel, numVsites);
// }
//}
}
void CudaIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {
if (random != NULL) {
......
......@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
//#include "CudaParallelKernels.h"
#include "CudaPlatform.h"
#include "openmm/internal/ContextImpl.h"
......@@ -66,64 +67,64 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
// }
// CudaContext& cl = *data.contexts[0];
// if (name == CalcForcesAndEnergyKernel::Name())
// return new CudaCalcForcesAndEnergyKernel(name, platform, cl);
// if (name == UpdateStateDataKernel::Name())
// return new CudaUpdateStateDataKernel(name, platform, cl);
// if (name == ApplyConstraintsKernel::Name())
// return new CudaApplyConstraintsKernel(name, platform, cl);
// if (name == VirtualSitesKernel::Name())
// return new CudaVirtualSitesKernel(name, platform, cl);
// if (name == CalcHarmonicBondForceKernel::Name())
// return new CudaCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
CudaContext& cu = *data.contexts[0];
if (name == CalcForcesAndEnergyKernel::Name())
return new CudaCalcForcesAndEnergyKernel(name, platform, cu);
if (name == UpdateStateDataKernel::Name())
return new CudaUpdateStateDataKernel(name, platform, cu);
if (name == ApplyConstraintsKernel::Name())
return new CudaApplyConstraintsKernel(name, platform, cu);
if (name == VirtualSitesKernel::Name())
return new CudaVirtualSitesKernel(name, platform, cu);
if (name == CalcHarmonicBondForceKernel::Name())
return new CudaCalcHarmonicBondForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomBondForceKernel::Name())
// return new CudaCalcCustomBondForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCustomBondForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaCalcHarmonicAngleForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcHarmonicAngleForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name())
// return new CudaCalcCustomAngleForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCustomAngleForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaCalcPeriodicTorsionForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcPeriodicTorsionForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name())
// return new CudaCalcRBTorsionForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcRBTorsionForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaCalcCMAPTorsionForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCMAPTorsionForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaCalcCustomTorsionForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCustomTorsionForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// return new CudaCalcNonbondedForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcNonbondedForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCustomNonbondedForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcGBSAOBCForceKernel::Name())
// return new CudaCalcGBSAOBCForceKernel(name, platform, cl);
// return new CudaCalcGBSAOBCForceKernel(name, platform, cu);
// if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name())
// return new CudaCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCustomExternalForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name())
// return new CudaCalcCustomHbondForceKernel(name, platform, cl, context.getSystem());
// return new CudaCalcCustomHbondForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
// if (name == IntegrateVerletStepKernel::Name())
// return new CudaIntegrateVerletStepKernel(name, platform, cl);
// return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem());
if (name == IntegrateVerletStepKernel::Name())
return new CudaIntegrateVerletStepKernel(name, platform, cu);
// if (name == IntegrateLangevinStepKernel::Name())
// return new CudaIntegrateLangevinStepKernel(name, platform, cl);
// return new CudaIntegrateLangevinStepKernel(name, platform, cu);
// if (name == IntegrateBrownianStepKernel::Name())
// return new CudaIntegrateBrownianStepKernel(name, platform, cl);
// return new CudaIntegrateBrownianStepKernel(name, platform, cu);
// if (name == IntegrateVariableVerletStepKernel::Name())
// return new CudaIntegrateVariableVerletStepKernel(name, platform, cl);
// return new CudaIntegrateVariableVerletStepKernel(name, platform, cu);
// if (name == IntegrateVariableLangevinStepKernel::Name())
// return new CudaIntegrateVariableLangevinStepKernel(name, platform, cl);
// return new CudaIntegrateVariableLangevinStepKernel(name, platform, cu);
// if (name == IntegrateCustomStepKernel::Name())
// return new CudaIntegrateCustomStepKernel(name, platform, cl);
// return new CudaIntegrateCustomStepKernel(name, platform, cu);
// if (name == ApplyAndersenThermostatKernel::Name())
// return new CudaApplyAndersenThermostatKernel(name, platform, cl);
// return new CudaApplyAndersenThermostatKernel(name, platform, cu);
// if (name == ApplyMonteCarloBarostatKernel::Name())
// return new CudaApplyMonteCarloBarostatKernel(name, platform, cl);
// if (name == CalcKineticEnergyKernel::Name())
// return new CudaCalcKineticEnergyKernel(name, platform, cl);
// return new CudaApplyMonteCarloBarostatKernel(name, platform, cu);
if (name == CalcKineticEnergyKernel::Name())
return new CudaCalcKineticEnergyKernel(name, platform, cu);
// if (name == RemoveCMMotionKernel::Name())
// return new CudaRemoveCMMotionKernel(name, platform, cl);
// return new CudaRemoveCMMotionKernel(name, platform, cu);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -28,236 +28,242 @@
* -------------------------------------------------------------------------- */
#include "CudaPlatform.h"
//#include "CudaArray.h"
#include "CudaArray.h"
#include "CudaContext.h"
//#include "CudaFFT3D.h"
//#include "CudaParameterSet.h"
//#include "CudaSort.h"
#include "CudaSort.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
namespace OpenMM {
///**
// * This kernel is invoked at the beginning and end of force and energy computations. It gives the
// * Platform a chance to clear buffers and do other initialization at the beginning, and to do any
// * necessary work at the end to determine the final results.
// */
//class CudaCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
//public:
// CudaCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CudaContext& cl) : CalcForcesAndEnergyKernel(name, platform), cl(cl) {
// }
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// */
// void initialize(const System& system);
// /**
// * This is called at the beginning of each force/energy computation, before calcForcesAndEnergy() has been called on
// * any ForceImpl.
// *
// * @param context the context in which to execute this kernel
// * @param includeForce true if forces should be computed
// * @param includeEnergy true if potential energy should be computed
// * @param groups a set of bit flags for which force groups to include
// */
// void beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
// /**
// * This is called at the end of each force/energy computation, after calcForcesAndEnergy() has been called on
// * every ForceImpl.
// *
// * @param context the context in which to execute this kernel
// * @param includeForce true if forces should be computed
// * @param includeEnergy true if potential energy should be computed
// * @param groups a set of bit flags for which force groups to include
// * @return the potential energy of the system. This value is added to all values returned by ForceImpls'
// * calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
// * energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
// */
// double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
//private:
// CudaContext& cl;
//};
//
///**
// * This kernel provides methods for setting and retrieving various state data: time, positions,
// * velocities, and forces.
// */
//class CudaUpdateStateDataKernel : public UpdateStateDataKernel {
//public:
// CudaUpdateStateDataKernel(std::string name, const Platform& platform, CudaContext& cl) : UpdateStateDataKernel(name, platform), cl(cl) {
// }
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// */
// void initialize(const System& system);
// /**
// * Get the current time (in picoseconds).
// *
// * @param context the context in which to execute this kernel
// */
// double getTime(const ContextImpl& context) const;
// /**
// * Set the current time (in picoseconds).
// *
// * @param context the context in which to execute this kernel
// */
// void setTime(ContextImpl& context, double time);
// /**
// * Get the positions of all particles.
// *
// * @param positions on exit, this contains the particle positions
// */
// void getPositions(ContextImpl& context, std::vector<Vec3>& positions);
// /**
// * Set the positions of all particles.
// *
// * @param positions a vector containg the particle positions
// */
// void setPositions(ContextImpl& context, const std::vector<Vec3>& positions);
// /**
// * Get the velocities of all particles.
// *
// * @param velocities on exit, this contains the particle velocities
// */
// void getVelocities(ContextImpl& context, std::vector<Vec3>& velocities);
// /**
// * Set the velocities of all particles.
// *
// * @param velocities a vector containg the particle velocities
// */
// void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities);
// /**
// * Get the current forces on all particles.
// *
// * @param forces on exit, this contains the forces
// */
// void getForces(ContextImpl& context, std::vector<Vec3>& forces);
// /**
// * Get the current periodic box vectors.
// *
// * @param a on exit, this contains the vector defining the first edge of the periodic box
// * @param b on exit, this contains the vector defining the second edge of the periodic box
// * @param c on exit, this contains the vector defining the third edge of the periodic box
// */
// void getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const;
// /**
// * Set the current periodic box vectors.
// *
// * @param a the vector defining the first edge of the periodic box
// * @param b the vector defining the second edge of the periodic box
// * @param c the vector defining the third edge of the periodic box
// */
// void setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const;
// /**
// * Create a checkpoint recording the current state of the Context.
// *
// * @param stream an output stream the checkpoint data should be written to
// */
// void createCheckpoint(ContextImpl& context, std::ostream& stream);
// /**
// * Load a checkpoint that was written by createCheckpoint().
// *
// * @param stream an input stream the checkpoint data should be read from
// */
// void loadCheckpoint(ContextImpl& context, std::istream& stream);
//private:
// CudaContext& cl;
//};
//
///**
// * This kernel modifies the positions of particles to enforce distance constraints.
// */
//class CudaApplyConstraintsKernel : public ApplyConstraintsKernel {
//public:
// CudaApplyConstraintsKernel(std::string name, const Platform& platform, CudaContext& cl) : ApplyConstraintsKernel(name, platform),
// cl(cl), hasInitializedKernel(false) {
// }
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// */
// void initialize(const System& system);
// /**
// * Update particle positions to enforce constraints.
// *
// * @param context the context in which to execute this kernel
// * @param tol the distance tolerance within which constraints must be satisfied.
// */
// void apply(ContextImpl& context, double tol);
//private:
// CudaContext& cl;
// bool hasInitializedKernel;
// cl::Kernel applyDeltasKernel;
//};
//
///**
// * This kernel recomputes the positions of virtual sites.
// */
//class CudaVirtualSitesKernel : public VirtualSitesKernel {
//public:
// CudaVirtualSitesKernel(std::string name, const Platform& platform, CudaContext& cl) : VirtualSitesKernel(name, platform), cl(cl) {
// }
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// */
// void initialize(const System& system);
// /**
// * Compute the virtual site locations.
// *
// * @param context the context in which to execute this kernel
// */
// void computePositions(ContextImpl& context);
//private:
// CudaContext& cl;
//};
//
///**
// * This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system.
// */
//class CudaCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
//public:
// CudaCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcHarmonicBondForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
// }
// ~CudaCalcHarmonicBondForceKernel();
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// * @param force the HarmonicBondForce this kernel will be used for
// */
// void initialize(const System& system, const HarmonicBondForce& force);
// /**
// * Execute the kernel to calculate the forces and/or energy.
// *
// * @param context the context in which to execute this kernel
// * @param includeForces true if forces should be calculated
// * @param includeEnergy true if the energy should be calculated
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
//private:
// int numBonds;
// bool hasInitializedKernel;
// CudaContext& cl;
// System& system;
// CudaArray<mm_float2>* params;
//};
//
/**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any
* necessary work at the end to determine the final results.
*/
class CudaCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
public:
CudaCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcForcesAndEnergyKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* This is called at the beginning of each force/energy computation, before calcForcesAndEnergy() has been called on
* any ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
void beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
/**
* This is called at the end of each force/energy computation, after calcForcesAndEnergy() has been called on
* every ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
private:
CudaContext& cu;
};
/**
* This kernel provides methods for setting and retrieving various state data: time, positions,
* velocities, and forces.
*/
class CudaUpdateStateDataKernel : public UpdateStateDataKernel {
public:
CudaUpdateStateDataKernel(std::string name, const Platform& platform, CudaContext& cu) : UpdateStateDataKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Get the current time (in picoseconds).
*
* @param context the context in which to execute this kernel
*/
double getTime(const ContextImpl& context) const;
/**
* Set the current time (in picoseconds).
*
* @param context the context in which to execute this kernel
*/
void setTime(ContextImpl& context, double time);
/**
* Get the positions of all particles.
*
* @param positions on exit, this contains the particle positions
*/
void getPositions(ContextImpl& context, std::vector<Vec3>& positions);
/**
* Set the positions of all particles.
*
* @param positions a vector containg the particle positions
*/
void setPositions(ContextImpl& context, const std::vector<Vec3>& positions);
/**
* Get the velocities of all particles.
*
* @param velocities on exit, this contains the particle velocities
*/
void getVelocities(ContextImpl& context, std::vector<Vec3>& velocities);
/**
* Set the velocities of all particles.
*
* @param velocities a vector containg the particle velocities
*/
void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities);
/**
* Get the current forces on all particles.
*
* @param forces on exit, this contains the forces
*/
void getForces(ContextImpl& context, std::vector<Vec3>& forces);
/**
* Get the current periodic box vectors.
*
* @param a on exit, this contains the vector defining the first edge of the periodic box
* @param b on exit, this contains the vector defining the second edge of the periodic box
* @param c on exit, this contains the vector defining the third edge of the periodic box
*/
void getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const;
/**
* Set the current periodic box vectors.
*
* @param a the vector defining the first edge of the periodic box
* @param b the vector defining the second edge of the periodic box
* @param c the vector defining the third edge of the periodic box
*/
void setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const;
/**
* Create a checkpoint recording the current state of the Context.
*
* @param stream an output stream the checkpoint data should be written to
*/
void createCheckpoint(ContextImpl& context, std::ostream& stream);
/**
* Load a checkpoint that was written by createCheckpoint().
*
* @param stream an input stream the checkpoint data should be read from
*/
void loadCheckpoint(ContextImpl& context, std::istream& stream);
private:
CudaContext& cu;
};
/**
* This kernel modifies the positions of particles to enforce distance constraints.
*/
class CudaApplyConstraintsKernel : public ApplyConstraintsKernel {
public:
CudaApplyConstraintsKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyConstraintsKernel(name, platform),
cu(cu), hasInitializedKernel(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Update particle positions to enforce constraints.
*
* @param context the context in which to execute this kernel
* @param tol the distance tolerance within which constraints must be satisfied.
*/
void apply(ContextImpl& context, double tol);
private:
CudaContext& cu;
bool hasInitializedKernel;
CUfunction applyDeltasKernel;
};
/**
* This kernel recomputes the positions of virtual sites.
*/
class CudaVirtualSitesKernel : public VirtualSitesKernel {
public:
CudaVirtualSitesKernel(std::string name, const Platform& platform, CudaContext& cu) : VirtualSitesKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Compute the virtual site locations.
*
* @param context the context in which to execute this kernel
*/
void computePositions(ContextImpl& context);
private:
CudaContext& cu;
};
/**
* This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
public:
CudaCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcHarmonicBondForceKernel(name, platform),
hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
}
~CudaCalcHarmonicBondForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the HarmonicBondForce this kernel will be used for
*/
void initialize(const System& system, const HarmonicBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the HarmonicBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private:
int numBonds;
bool hasInitializedKernel;
CudaContext& cu;
System& system;
CudaArray* params;
};
///**
// * This kernel is invoked by CustomBondForce to calculate the forces acting on the system and the energy of the system.
// */
//class CudaCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
//public:
// CudaCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomBondForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) {
// CudaCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomBondForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
// }
// ~CudaCalcCustomBondForceKernel();
// /**
......@@ -276,10 +282,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomBondForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
//private:
// int numBonds;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaParameterSet* params;
// CudaArray<cl_float>* globals;
......@@ -292,8 +305,8 @@ namespace OpenMM {
// */
//class CudaCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
//public:
// CudaCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcHarmonicAngleForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
// CudaCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcHarmonicAngleForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
// }
// ~CudaCalcHarmonicAngleForceKernel();
// /**
......@@ -312,10 +325,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the HarmonicAngleForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
//private:
// int numAngles;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaArray<mm_float2>* params;
//};
......@@ -325,8 +345,8 @@ namespace OpenMM {
// */
//class CudaCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
//public:
// CudaCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomAngleForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) {
// CudaCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomAngleForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
// }
// ~CudaCalcCustomAngleForceKernel();
// /**
......@@ -345,10 +365,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomAngleForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
//private:
// int numAngles;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaParameterSet* params;
// CudaArray<cl_float>* globals;
......@@ -361,8 +388,8 @@ namespace OpenMM {
// */
//class CudaCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
//public:
// CudaCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcPeriodicTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
// CudaCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcPeriodicTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
// }
// ~CudaCalcPeriodicTorsionForceKernel();
// /**
......@@ -381,10 +408,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the PeriodicTorsionForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
//private:
// int numTorsions;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaArray<mm_float4>* params;
//};
......@@ -394,8 +428,8 @@ namespace OpenMM {
// */
//class CudaCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
//public:
// CudaCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcRBTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
// CudaCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcRBTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
// }
// ~CudaCalcRBTorsionForceKernel();
// /**
......@@ -414,10 +448,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the RBTorsionForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
//private:
// int numTorsions;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaArray<mm_float8>* params;
//};
......@@ -427,8 +468,8 @@ namespace OpenMM {
// */
//class CudaCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
//public:
// CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCMAPTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), coefficients(NULL), mapPositions(NULL), torsionMaps(NULL) {
// CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCMAPTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), coefficients(NULL), mapPositions(NULL), torsionMaps(NULL) {
// }
// ~CudaCalcCMAPTorsionForceKernel();
// /**
......@@ -450,7 +491,7 @@ namespace OpenMM {
//private:
// int numTorsions;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaArray<mm_float4>* coefficients;
// CudaArray<mm_int2>* mapPositions;
......@@ -462,8 +503,8 @@ namespace OpenMM {
// */
//class CudaCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
//public:
// CudaCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) {
// CudaCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomTorsionForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
// }
// ~CudaCalcCustomTorsionForceKernel();
// /**
......@@ -482,10 +523,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomTorsionForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
//private:
// int numTorsions;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaParameterSet* params;
// CudaArray<cl_float>* globals;
......@@ -498,8 +546,8 @@ namespace OpenMM {
// */
//class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
//public:
// CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcNonbondedForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL),
// CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcNonbondedForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL),
// pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeBsplineDTheta(NULL),
// pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), fft(NULL) {
// }
......@@ -540,7 +588,7 @@ namespace OpenMM {
// static const char* clMaxValue() {return "(int2) (INT_MAX, INT_MAX)";}
// static const char* clSortKey() {return "value.y";}
// };
// CudaContext& cl;
// CudaContext& cu;
// bool hasInitializedKernel;
// CudaArray<mm_float2>* sigmaEpsilon;
// CudaArray<mm_float4>* exceptionParams;
......@@ -556,16 +604,16 @@ namespace OpenMM {
// CudaArray<mm_int2>* pmeAtomGridIndex;
// CudaSort<SortTrait>* sort;
// CudaFFT3D* fft;
// cl::Kernel ewaldSumsKernel;
// cl::Kernel ewaldForcesKernel;
// cl::Kernel pmeGridIndexKernel;
// cl::Kernel pmeAtomRangeKernel;
// cl::Kernel pmeZIndexKernel;
// cl::Kernel pmeUpdateBsplinesKernel;
// cl::Kernel pmeSpreadChargeKernel;
// cl::Kernel pmeFinishSpreadChargeKernel;
// cl::Kernel pmeConvolutionKernel;
// cl::Kernel pmeInterpolateForceKernel;
// CUfunction ewaldSumsKernel;
// CUfunction ewaldForcesKernel;
// CUfunction pmeGridIndexKernel;
// CUfunction pmeAtomRangeKernel;
// CUfunction pmeZIndexKernel;
// CUfunction pmeUpdateBsplinesKernel;
// CUfunction pmeSpreadChargeKernel;
// CUfunction pmeFinishSpreadChargeKernel;
// CUfunction pmeConvolutionKernel;
// CUfunction pmeInterpolateForceKernel;
// std::map<std::string, std::string> pmeDefines;
// double ewaldSelfEnergy, dispersionCoefficient, alpha;
// int interpolateForceThreads;
......@@ -578,8 +626,8 @@ namespace OpenMM {
// */
//class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
//public:
// CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomNonbondedForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) {
// CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomNonbondedForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) {
// }
// ~CudaCalcCustomNonbondedForceKernel();
// /**
......@@ -598,9 +646,16 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomNonbondedForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
//private:
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// CudaParameterSet* params;
// CudaArray<cl_float>* globals;
// CudaArray<mm_float4>* tabulatedFunctionParams;
......@@ -615,7 +670,7 @@ namespace OpenMM {
// */
//class CudaCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
//public:
// CudaCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CudaContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl),
// CudaCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGBSAOBCForceKernel(name, platform), cu(cu),
// hasCreatedKernels(false), params(NULL), bornSum(NULL), longBornSum(NULL), bornRadii(NULL), bornForce(NULL),
// longBornForce(NULL), obcChain(NULL) {
// }
......@@ -636,11 +691,18 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the GBSAOBCForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
//private:
// double prefactor;
// bool hasCreatedKernels;
// int maxTiles;
// CudaContext& cl;
// CudaContext& cu;
// CudaArray<mm_float2>* params;
// CudaArray<cl_float>* bornSum;
// CudaArray<cl_long>* longBornSum;
......@@ -648,10 +710,10 @@ namespace OpenMM {
// CudaArray<cl_float>* bornForce;
// CudaArray<cl_long>* longBornForce;
// CudaArray<cl_float>* obcChain;
// cl::Kernel computeBornSumKernel;
// cl::Kernel reduceBornSumKernel;
// cl::Kernel force1Kernel;
// cl::Kernel reduceBornForceKernel;
// CUfunction computeBornSumKernel;
// CUfunction reduceBornSumKernel;
// CUfunction force1Kernel;
// CUfunction reduceBornForceKernel;
//};
//
///**
......@@ -659,8 +721,8 @@ namespace OpenMM {
// */
//class CudaCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
//public:
// CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomGBForceKernel(name, platform),
// hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), longEnergyDerivs(NULL), globals(NULL),
// CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomGBForceKernel(name, platform),
// hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), longEnergyDerivs(NULL), globals(NULL),
// valueBuffers(NULL), longValueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) {
// }
// ~CudaCalcCustomGBForceKernel();
......@@ -680,10 +742,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomGBForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
//private:
// bool hasInitializedKernels, needParameterGradient;
// int maxTiles, numComputedValues;
// CudaContext& cl;
// CudaContext& cu;
// CudaParameterSet* params;
// CudaParameterSet* computedValues;
// CudaParameterSet* energyDerivs;
......@@ -697,7 +766,7 @@ namespace OpenMM {
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions;
// std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
// System& system;
// cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
// CUfunction pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
//};
//
///**
......@@ -705,8 +774,8 @@ namespace OpenMM {
// */
//class CudaCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
//public:
// CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomExternalForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), system(system), params(NULL), globals(NULL) {
// CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomExternalForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
// }
// ~CudaCalcCustomExternalForceKernel();
// /**
......@@ -725,10 +794,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomExternalForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
//private:
// int numParticles;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// System& system;
// CudaParameterSet* params;
// CudaArray<cl_float>* globals;
......@@ -741,8 +817,8 @@ namespace OpenMM {
// */
//class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
//public:
// CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomHbondForceKernel(name, platform),
// hasInitializedKernel(false), cl(cl), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL),
// CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomHbondForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL),
// donorBufferIndices(NULL), acceptorBufferIndices(NULL), globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL),
// tabulatedFunctionParams(NULL), system(system) {
// }
......@@ -763,10 +839,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomHbondForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
//private:
// int numDonors, numAcceptors;
// bool hasInitializedKernel;
// CudaContext& cl;
// CudaContext& cu;
// CudaParameterSet* donorParams;
// CudaParameterSet* acceptorParams;
// CudaArray<cl_float>* globals;
......@@ -781,7 +864,7 @@ namespace OpenMM {
// std::vector<cl_float> globalParamValues;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions;
// System& system;
// cl::Kernel donorKernel, acceptorKernel;
// CUfunction donorKernel, acceptorKernel;
//};
//
///**
......@@ -789,8 +872,8 @@ namespace OpenMM {
// */
//class CudaCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
//public:
// CudaCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaContext& cl, System& system) : CalcCustomCompoundBondForceKernel(name, platform),
// cl(cl), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) {
// CudaCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomCompoundBondForceKernel(name, platform),
// cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) {
// }
// ~CudaCalcCustomCompoundBondForceKernel();
// /**
......@@ -809,9 +892,17 @@ namespace OpenMM {
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomCompoundBondForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
//
//private:
// int numBonds;
// CudaContext& cl;
// CudaContext& cu;
// CudaParameterSet* params;
// CudaArray<cl_float>* globals;
// CudaArray<mm_float4>* tabulatedFunctionParams;
......@@ -820,43 +911,41 @@ namespace OpenMM {
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions;
// System& system;
//};
//
///**
// * This kernel is invoked by VerletIntegrator to take one time step.
// */
//class CudaIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
//public:
// CudaIntegrateVerletStepKernel(std::string name, const Platform& platform, CudaContext& cl) : IntegrateVerletStepKernel(name, platform), cl(cl),
// hasInitializedKernels(false) {
// }
// ~CudaIntegrateVerletStepKernel();
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// * @param integrator the VerletIntegrator this kernel will be used for
// */
// void initialize(const System& system, const VerletIntegrator& integrator);
// /**
// * Execute the kernel.
// *
// * @param context the context in which to execute this kernel
// * @param integrator the VerletIntegrator this kernel is being used for
// */
// void execute(ContextImpl& context, const VerletIntegrator& integrator);
//private:
// CudaContext& cl;
// double prevStepSize;
// bool hasInitializedKernels;
// cl::Kernel kernel1, kernel2;
//};
//
/**
* This kernel is invoked by VerletIntegrator to take one time step.
*/
class CudaIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
public:
CudaIntegrateVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVerletStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateVerletStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the VerletIntegrator this kernel will be used for
*/
void initialize(const System& system, const VerletIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const VerletIntegrator& integrator);
private:
CudaContext& cu;
double prevStepSize;
CUfunction kernel1, kernel2;
};
///**
// * This kernel is invoked by LangevinIntegrator to take one time step.
// */
//class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
//public:
// CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
// CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu),
// hasInitializedKernels(false), params(NULL) {
// }
// ~CudaIntegrateLangevinStepKernel();
......@@ -875,11 +964,11 @@ namespace OpenMM {
// */
// void execute(ContextImpl& context, const LangevinIntegrator& integrator);
//private:
// CudaContext& cl;
// CudaContext& cu;
// double prevTemp, prevFriction, prevStepSize;
// bool hasInitializedKernels;
// CudaArray<cl_float>* params;
// cl::Kernel kernel1, kernel2;
// CUfunction kernel1, kernel2;
//};
//
///**
......@@ -887,7 +976,7 @@ namespace OpenMM {
// */
//class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
//public:
// CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cl) : IntegrateBrownianStepKernel(name, platform), cl(cl),
// CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBrownianStepKernel(name, platform), cu(cu),
// hasInitializedKernels(false), prevTemp(-1), prevFriction(-1), prevStepSize(-1) {
// }
// ~CudaIntegrateBrownianStepKernel();
......@@ -906,10 +995,10 @@ namespace OpenMM {
// */
// void execute(ContextImpl& context, const BrownianIntegrator& integrator);
//private:
// CudaContext& cl;
// CudaContext& cu;
// double prevTemp, prevFriction, prevStepSize;
// bool hasInitializedKernels;
// cl::Kernel kernel1, kernel2;
// CUfunction kernel1, kernel2;
//};
//
///**
......@@ -917,7 +1006,7 @@ namespace OpenMM {
// */
//class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
//public:
// CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cl) : IntegrateVariableVerletStepKernel(name, platform), cl(cl),
// CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableVerletStepKernel(name, platform), cu(cu),
// hasInitializedKernels(false) {
// }
// ~CudaIntegrateVariableVerletStepKernel();
......@@ -938,10 +1027,10 @@ namespace OpenMM {
// */
// double execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime);
//private:
// CudaContext& cl;
// CudaContext& cu;
// bool hasInitializedKernels;
// int blockSize;
// cl::Kernel kernel1, kernel2, selectSizeKernel;
// CUfunction kernel1, kernel2, selectSizeKernel;
//};
//
///**
......@@ -949,7 +1038,7 @@ namespace OpenMM {
// */
//class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
//public:
// CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl),
// CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform), cu(cu),
// hasInitializedKernels(false), params(NULL) {
// }
// ~CudaIntegrateVariableLangevinStepKernel();
......@@ -970,11 +1059,11 @@ namespace OpenMM {
// */
// double execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime);
//private:
// CudaContext& cl;
// CudaContext& cu;
// bool hasInitializedKernels;
// int blockSize;
// CudaArray<cl_float>* params;
// cl::Kernel kernel1, kernel2, selectSizeKernel;
// CUfunction kernel1, kernel2, selectSizeKernel;
// double prevTemp, prevFriction, prevErrorTol;
//};
//
......@@ -983,7 +1072,7 @@ namespace OpenMM {
// */
//class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
//public:
// CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
// CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
// hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), energy(NULL),
// uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) {
// }
......@@ -1041,7 +1130,7 @@ namespace OpenMM {
// std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName);
// std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
// void recordChangedParameters(ContextImpl& context);
// CudaContext& cl;
// CudaContext& cu;
// double prevStepSize;
// int numGlobalVariables;
// bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters;
......@@ -1054,8 +1143,8 @@ namespace OpenMM {
// CudaArray<mm_int4>* randomSeed;
// CudaParameterSet* perDofValues;
// mutable std::vector<std::vector<cl_float> > localPerDofValues;
// std::vector<std::vector<cl::Kernel> > kernels;
// cl::Kernel sumEnergyKernel, randomKernel;
// std::vector<std::vector<CUfunction> > kernels;
// CUfunction sumEnergyKernel, randomKernel;
// std::vector<CustomIntegrator::ComputationType> stepType;
// std::vector<bool> needsForces;
// std::vector<bool> needsEnergy;
......@@ -1072,7 +1161,7 @@ namespace OpenMM {
// */
//class CudaApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
//public:
// CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl),
// CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyAndersenThermostatKernel(name, platform), cu(cu),
// hasInitializedKernels(false), atomGroups(NULL) {
// }
// ~CudaApplyAndersenThermostatKernel();
......@@ -1090,11 +1179,11 @@ namespace OpenMM {
// */
// void execute(ContextImpl& context);
//private:
// CudaContext& cl;
// CudaContext& cu;
// bool hasInitializedKernels;
// int randomSeed;
// CudaArray<cl_int>* atomGroups;
// cl::Kernel kernel;
// CUfunction kernel;
//};
//
///**
......@@ -1102,7 +1191,7 @@ namespace OpenMM {
// */
//class CudaApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
//public:
// CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaContext& cl) : ApplyMonteCarloBarostatKernel(name, platform), cl(cl),
// CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyMonteCarloBarostatKernel(name, platform), cu(cu),
// hasInitializedKernels(false), savedPositions(NULL), moleculeAtoms(NULL), moleculeStartIndex(NULL) {
// }
// ~CudaApplyMonteCarloBarostatKernel();
......@@ -1131,45 +1220,45 @@ namespace OpenMM {
// */
// void restoreCoordinates(ContextImpl& context);
//private:
// CudaContext& cl;
// CudaContext& cu;
// bool hasInitializedKernels;
// int numMolecules;
// CudaArray<mm_float4>* savedPositions;
// CudaArray<cl_int>* moleculeAtoms;
// CudaArray<cl_int>* moleculeStartIndex;
// cl::Kernel kernel;
// CUfunction kernel;
//};
//
///**
// * This kernel is invoked to calculate the kinetic energy of the system.
// */
//class CudaCalcKineticEnergyKernel : public CalcKineticEnergyKernel {
//public:
// CudaCalcKineticEnergyKernel(std::string name, const Platform& platform, CudaContext& cl) : CalcKineticEnergyKernel(name, platform), cl(cl) {
// }
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// */
// void initialize(const System& system);
// /**
// * Execute the kernel.
// *
// * @param context the context in which to execute this kernel
// */
// double execute(ContextImpl& context);
//private:
// CudaContext& cl;
// std::vector<double> masses;
//};
//
/**
* This kernel is invoked to calculate the kinetic energy of the system.
*/
class CudaCalcKineticEnergyKernel : public CalcKineticEnergyKernel {
public:
CudaCalcKineticEnergyKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcKineticEnergyKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
*/
double execute(ContextImpl& context);
private:
CudaContext& cu;
std::vector<double> masses;
};
///**
// * This kernel is invoked to remove center of mass motion from the system.
// */
//class CudaRemoveCMMotionKernel : public RemoveCMMotionKernel {
//public:
// CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl), cmMomentum(NULL) {
// CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cu) : RemoveCMMotionKernel(name, platform), cu(cu), cmMomentum(NULL) {
// }
// ~CudaRemoveCMMotionKernel();
// /**
......@@ -1186,12 +1275,13 @@ namespace OpenMM {
// */
// void execute(ContextImpl& context);
//private:
// CudaContext& cl;
// CudaContext& cu;
// int frequency;
// CudaArray<mm_float4>* cmMomentum;
// cl::Kernel kernel1, kernel2;
// CUfunction kernel1, kernel2;
//};
} // namespace OpenMM
#endif /*OPENMM_CUDAKERNELS_H_*/
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
dEdR = (r > 0) ? (dEdR / r) : 0;
delta *= dEdR;
real3 force1 = delta;
real3 force2 = -delta;
float2 bondParams = PARAMS[index];
real deltaIdeal = r-bondParams.x;
energy += 0.5f * bondParams.y*deltaIdeal*deltaIdeal;
real dEdR = bondParams.y * deltaIdeal;
......@@ -42,7 +42,7 @@ inline __device__ double4 make_double4(double a) {
// Negate a vector.
inline __device__ int2 operator*(int2 a) {
inline __device__ int2 operator-(int2 a) {
return make_int2(-a.x, -a.y);
}
......@@ -455,3 +455,41 @@ inline __device__ double3 operator*(double a, double3 b) {
inline __device__ double4 operator*(double a, double4 b) {
return make_double4(a*b.x, a*b.y, a*b.z, a*b.w);
}
// *= operator (multiply vector by constant)
inline __device__ void operator*=(int2& a, int b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(int3& a, int b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(int4& a, int b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
inline __device__ void operator*=(float2& a, float b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(float3& a, float b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(float4& a, float b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
inline __device__ void operator*=(double2& a, double b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(double3& a, double b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(double4& a, double b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
/**
* Perform the first step of Verlet integration.
*/
extern "C" __global__ void integrateVerletPart1(const real2* __restrict__ dt, const real4* __restrict__ posq, real4* __restrict__ velm, const long long* __restrict__ force, real4* __restrict__ posDelta) {
const real2 stepSize = dt[0];
const real dtPos = stepSize.y;
const real dtVel = 0.5f*(stepSize.x+stepSize.y);
const real scale = dtVel/(real) 0xFFFFFFFF;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 velocity = velm[index];
if (velocity.w != 0.0) {
real4 pos = posq[index];
velocity.x += scale*force[index]*velocity.w;
velocity.y += scale*force[index+PADDED_NUM_ATOMS]*velocity.w;
velocity.z += scale*force[index+PADDED_NUM_ATOMS*2]*velocity.w;
pos.x = velocity.x*dtPos;
pos.y = velocity.y*dtPos;
pos.z = velocity.z*dtPos;
posDelta[index] = pos;
velm[index] = velocity;
}
}
}
/**
* Perform the second step of Verlet integration.
*/
extern "C" __global__ void integrateVerletPart2(real2* __restrict__ dt, real4* __restrict__ posq, real4* __restrict__ velm, const real4* __restrict__ posDelta) {
real2 stepSize = dt[0];
double oneOverDt = 1.0/stepSize.y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
if (index == 0)
dt[0].x = stepSize.y;
for (; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 velocity = velm[index];
if (velocity.w != 0.0) {
real4 pos = posq[index];
real4 delta = posDelta[index];
pos.x += delta.x;
pos.y += delta.y;
pos.z += delta.z;
velocity = make_real4((real) (delta.x*oneOverDt), (real) (delta.y*oneOverDt), (real) (delta.z*oneOverDt), velocity.w);
posq[index] = pos;
velm[index] = velocity;
}
}
}
/**
* Select the step size to use for the next step.
*/
//
//extern "C" __global__ void selectVerletStepSize(real maxStepSize, real errorTol, real2* __restrict__ dt, const real4* __restrict__ velm, const real4* __restrict__ force, __local real* __restrict__ error) {
// // Calculate the error.
//
// real err = 0.0f;
// for (int index = threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// real4 f = force[index];
// real invMass = velm[index].w;
// err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass;
// }
// error[threadIdx.x] = err;
// __syncthreads;
//
// // Sum the errors from all threads.
//
// for (unsigned int offset = 1; offset < get_local_size(0); offset *= 2) {
// if (threadIdx.x+offset < get_local_size(0) && (threadIdx.x&(2*offset-1)) == 0)
// error[threadIdx.x] += error[threadIdx.x+offset];
// __syncthreads;
// }
// if (threadIdx.x == 0) {
// real totalError = sqrt(error[0]/(NUM_ATOMS*3));
// real newStepSize = sqrt(errorTol/totalError);
// real oldStepSize = dt[0].y;
// if (oldStepSize > 0.0f)
// newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
// if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
// newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
// if (newStepSize > maxStepSize)
// newStepSize = maxStepSize;
// dt[0].y = newStepSize;
// }
//}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of HarmonicBondForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <map>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testBonds() {
CudaPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
HarmonicBondForce* forceField = new HarmonicBondForce();
forceField->addBond(0, 1, 1.5, 0.8);
forceField->addBond(1, 2, 1.2, 0.7);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(3);
positions[0] = Vec3(0, 2, 0);
positions[1] = Vec3(0, 0, 0);
positions[2] = Vec3(1, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
{
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0, -0.8*0.5, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0.7*0.2, 0, 0), forces[2], TOL);
ASSERT_EQUAL_VEC(Vec3(-forces[0][0]-forces[2][0], -forces[0][1]-forces[2][1], -forces[0][2]-forces[2][2]), forces[1], TOL);
ASSERT_EQUAL_TOL(0.5*0.8*0.5*0.5 + 0.5*0.7*0.2*0.2, state.getPotentialEnergy(), TOL);
}
// Try changing the bond parameters and make sure it's still correct.
forceField->setBondParameters(0, 0, 1, 1.6, 0.9);
forceField->setBondParameters(1, 1, 2, 1.3, 0.8);
forceField->updateParametersInContext(context);
state = context.getState(State::Forces | State::Energy);
{
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_VEC(Vec3(0, -0.9*0.4, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0.8*0.3, 0, 0), forces[2], TOL);
ASSERT_EQUAL_VEC(Vec3(-forces[0][0]-forces[2][0], -forces[0][1]-forces[2][1], -forces[0][2]-forces[2][2]), forces[1], TOL);
ASSERT_EQUAL_TOL(0.5*0.9*0.4*0.4 + 0.5*0.8*0.3*0.3, state.getPotentialEnergy(), TOL);
}
}
void testParallelComputation() {
CudaPlatform platform;
System system;
const int numParticles = 200;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
HarmonicBondForce* force = new HarmonicBondForce();
for (int i = 1; i < numParticles; i++)
force->addBond(i-1, i, 1.1, i);
system.addForce(force);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(i, 0, 0);
VerletIntegrator integrator1(0.01);
Context context1(system, integrator1, platform);
context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy);
VerletIntegrator integrator2(0.01);
string deviceIndex = platform.getPropertyValue(context1, CudaPlatform::CudaDeviceIndex());
map<string, string> props;
props[CudaPlatform::CudaDeviceIndex()] = deviceIndex+","+deviceIndex;
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-5);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
}
int main() {
try {
testBonds();
// testParallelComputation();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of VerletIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
/**
* Compute the energy of a state, taking into account the half step offset between
* positions and velocities.
*/
static double computeEnergy(const State& state, const System& system, double dt) {
const vector<Vec3>& v = state.getVelocities();
const vector<Vec3>& f = state.getForces();
double energy = 0.0;
for (int i = 0; i < system.getNumParticles(); i++) {
double m = system.getParticleMass(i);
Vec3 vel = v[i]+f[i]*(0.5*dt/m);
energy += 0.5*m*vel.dot(vel);
}
return energy+state.getPotentialEnergy();
}
void testSingleBond() {
CudaPlatform platform;
System system;
system.addParticle(2.0);
system.addParticle(2.0);
VerletIntegrator integrator(0.01);
HarmonicBondForce* forceField = new HarmonicBondForce();
forceField->addBond(0, 1, 1.5, 1);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
// This is simply a harmonic oscillator, so compare it to the analytical solution.
const double freq = 1.0;;
State state = context.getState(State::Energy);
const double initialEnergy = state.getKineticEnergy()+state.getPotentialEnergy();
for (int i = 0; i < 1000; ++i) {
state = context.getState(State::Positions | State::Velocities | State::Energy);
double time = state.getTime();
double expectedDist = 1.5+0.5*std::cos(freq*time);
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
double expectedSpeed = -0.5*freq*std::sin(freq*time);
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.02);
double energy = state.getKineticEnergy()+state.getPotentialEnergy();
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
integrator.step(1);
}
ASSERT_EQUAL_TOL(10.0, context.getState(0).getTime(), 1e-5);
}
void testConstraints() {
const int numParticles = 8;
const int numConstraints = 5;
const double temp = 100.0;
CudaPlatform platform;
System system;
VerletIntegrator integrator(0.001);
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
system.addConstraint(0, 1, 1.0);
system.addConstraint(1, 2, 1.0);
system.addConstraint(2, 3, 1.0);
system.addConstraint(4, 5, 1.0);
system.addConstraint(6, 7, 1.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3(i/2, (i+1)/2, 0);
velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Energy | State::Velocities | State::Forces);
for (int j = 0; j < numConstraints; ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 1e-4);
}
double energy = computeEnergy(state, system, integrator.getStepSize());
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
integrator.step(1);
}
}
void testConstrainedClusters() {
const int numParticles = 7;
const double temp = 500.0;
CudaPlatform platform;
System system;
VerletIntegrator integrator(0.001);
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(i > 1 ? 1.0 : 10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
system.addConstraint(0, 1, 1.0);
system.addConstraint(0, 2, 1.0);
system.addConstraint(0, 3, 1.0);
system.addConstraint(0, 4, 1.0);
system.addConstraint(1, 5, 1.0);
system.addConstraint(1, 6, 1.0);
system.addConstraint(2, 3, sqrt(2.0));
system.addConstraint(2, 4, sqrt(2.0));
system.addConstraint(3, 4, sqrt(2.0));
system.addConstraint(5, 6, sqrt(2.0));
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(1, 0, 0);
positions[2] = Vec3(-1, 0, 0);
positions[3] = Vec3(0, 1, 0);
positions[4] = Vec3(0, 0, 1);
positions[5] = Vec3(2, 0, 0);
positions[6] = Vec3(1, 1, 0);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i)
velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
double initialEnergy = 0.0;
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Energy | State::Velocities | State::Forces);
for (int j = 0; j < system.getNumConstraints(); ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 2e-5);
}
double energy = computeEnergy(state, system, integrator.getStepSize());
if (i == 1)
initialEnergy = energy;
else if (i > 1)
ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
integrator.step(1);
}
}
int main() {
try {
testSingleBond();
// testConstraints();
// testConstrainedClusters();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment