Commit 18295108 authored by peastman's avatar peastman
Browse files

Merge changes from main branch

parents e6101f68 8d7234e5
...@@ -53,6 +53,7 @@ public: ...@@ -53,6 +53,7 @@ public:
const std::string& getPropertyValue(const Context& context, const std::string& property) const; const std::string& getPropertyValue(const Context& context, const std::string& property) const;
void setPropertyValue(Context& context, const std::string& property, const std::string& value) const; void setPropertyValue(Context& context, const std::string& property, const std::string& value) const;
void contextCreated(ContextImpl& context, const std::map<std::string, std::string>& properties) const; void contextCreated(ContextImpl& context, const std::map<std::string, std::string>& properties) const;
void linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const;
void contextDestroyed(ContextImpl& context) const; void contextDestroyed(ContextImpl& context) const;
/** /**
* This is the name of the parameter for selecting which OpenCL device or devices to use. * This is the name of the parameter for selecting which OpenCL device or devices to use.
...@@ -108,7 +109,7 @@ public: ...@@ -108,7 +109,7 @@ public:
class OPENMM_EXPORT_OPENCL OpenCLPlatform::PlatformData { class OPENMM_EXPORT_OPENCL OpenCLPlatform::PlatformData {
public: public:
PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty, PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads); const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads, ContextImpl* originalContext);
~PlatformData(); ~PlatformData();
void initializeContexts(const System& system); void initializeContexts(const System& system);
void syncContexts(); void syncContexts();
......
...@@ -67,9 +67,9 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -67,9 +67,9 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
std::cerr << "OpenCL internal error: " << errinfo << std::endl; std::cerr << "OpenCL internal error: " << errinfo << std::endl;
} }
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energySum(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL),
chargeBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { chargeBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
useDoublePrecision = false; useDoublePrecision = false;
...@@ -261,8 +261,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -261,8 +261,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
vector<cl::Device> contextDevices; vector<cl::Device> contextDevices;
contextDevices.push_back(device); contextDevices.push_back(device);
cl_context_properties cprops[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[bestPlatform](), 0}; cl_context_properties cprops[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[bestPlatform](), 0};
context = cl::Context(contextDevices, cprops, errorCallback); if (originalContext == NULL) {
defaultQueue = cl::CommandQueue(context, device); context = cl::Context(contextDevices, cprops, errorCallback);
defaultQueue = cl::CommandQueue(context, device);
}
else {
context = originalContext->context;
defaultQueue = originalContext->defaultQueue;
}
currentQueue = defaultQueue; currentQueue = defaultQueue;
numAtoms = system.getNumParticles(); numAtoms = system.getNumParticles();
paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize); paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
...@@ -309,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -309,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer"); reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer");
if (supports64BitGlobalAtomics) if (supports64BitGlobalAtomics)
reduceForcesKernel = cl::Kernel(utilities, "reduceForces"); reduceForcesKernel = cl::Kernel(utilities, "reduceForces");
reduceEnergyKernel = cl::Kernel(utilities, "reduceEnergy");
setChargesKernel = cl::Kernel(utilities, "setCharges"); setChargesKernel = cl::Kernel(utilities, "setCharges");
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use. // Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...@@ -436,6 +443,8 @@ OpenCLContext::~OpenCLContext() { ...@@ -436,6 +443,8 @@ OpenCLContext::~OpenCLContext() {
delete longForceBuffer; delete longForceBuffer;
if (energyBuffer != NULL) if (energyBuffer != NULL)
delete energyBuffer; delete energyBuffer;
if (energySum != NULL)
delete energySum;
if (energyParamDerivBuffer != NULL) if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer; delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
...@@ -465,11 +474,19 @@ void OpenCLContext::initialize() { ...@@ -465,11 +474,19 @@ void OpenCLContext::initialize() {
forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer"); energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum");
} }
else { else if (useMixedPrecision) {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer"); energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum");
}
else {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_float>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_float>(*this, 1, "energySum");
} }
if (supports64BitGlobalAtomics) { if (supports64BitGlobalAtomics) {
longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer"); longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
...@@ -750,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) { ...@@ -750,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
executeKernel(reduceReal4Kernel, bufferSize, 128); executeKernel(reduceReal4Kernel, bufferSize, 128);
} }
double OpenCLContext::reduceEnergy() {
int workGroupSize = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (workGroupSize > 512)
workGroupSize = 512;
reduceEnergyKernel.setArg<cl::Buffer>(0, energyBuffer->getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(1, energySum->getDeviceBuffer());
reduceEnergyKernel.setArg<cl_int>(2, energyBuffer->getSize());
reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
reduceEnergyKernel.setArg(4, workGroupSize*energyBuffer->getElementSize(), NULL);
executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
if (getUseDoublePrecision() || getUseMixedPrecision()) {
double energy;
energySum->download(&energy);
return energy;
}
else {
float energy;
energySum->download(&energy);
return energy;
}
}
void OpenCLContext::setCharges(const vector<double>& charges) { void OpenCLContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL) if (chargeBuffer == NULL)
chargeBuffer = new OpenCLArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer"); chargeBuffer = new OpenCLArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
...@@ -939,9 +978,16 @@ void OpenCLContext::findMoleculeGroups() { ...@@ -939,9 +978,16 @@ void OpenCLContext::findMoleculeGroups() {
for (int i = 0; i < (int) forces.size() && identical; i++) { for (int i = 0; i < (int) forces.size() && identical; i++) {
if (mol.groups[i].size() != mol2.groups[i].size()) if (mol.groups[i].size() != mol2.groups[i].size())
identical = false; identical = false;
for (int k = 0; k < (int) mol.groups[i].size() && identical; k++) for (int k = 0; k < (int) mol.groups[i].size() && identical; k++) {
if (!forces[i]->areGroupsIdentical(mol.groups[i][k], mol2.groups[i][k])) if (!forces[i]->areGroupsIdentical(mol.groups[i][k], mol2.groups[i][k]))
identical = false; identical = false;
vector<int> p1, p2;
forces[i]->getParticlesInGroup(mol.groups[i][k], p1);
forces[i]->getParticlesInGroup(mol2.groups[i][k], p2);
for (int m = 0; m < p1.size(); m++)
if (p1[m] != p2[m]-atomOffset)
identical = false;
}
} }
if (identical) { if (identical) {
moleculeInstances[j].push_back(molIndex); moleculeInstances[j].push_back(molIndex);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -102,7 +102,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -102,7 +102,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL), ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedHostBuffer(NULL), ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL), ccmaConvergedHostBuffer(NULL),
vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL), vsite2AvgAtoms(NULL), vsite2AvgWeights(NULL), vsite3AvgAtoms(NULL), vsite3AvgWeights(NULL),
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsAtoms(NULL), vsiteLocalCoordsParams(NULL), vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsIndex(NULL), vsiteLocalCoordsAtoms(NULL),
vsiteLocalCoordsWeights(NULL), vsiteLocalCoordsPos(NULL), vsiteLocalCoordsStartIndex(NULL),
hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false), hasOverlappingVsites(false) { hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false), hasOverlappingVsites(false) {
// Create workspace arrays. // Create workspace arrays.
...@@ -497,8 +498,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -497,8 +498,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vector<mm_double4> vsite3AvgWeightVec; vector<mm_double4> vsite3AvgWeightVec;
vector<mm_int4> vsiteOutOfPlaneAtomVec; vector<mm_int4> vsiteOutOfPlaneAtomVec;
vector<mm_double4> vsiteOutOfPlaneWeightVec; vector<mm_double4> vsiteOutOfPlaneWeightVec;
vector<mm_int4> vsiteLocalCoordsAtomVec; vector<cl_int> vsiteLocalCoordsIndexVec;
vector<cl_double> vsiteLocalCoordsParamVec; vector<cl_int> vsiteLocalCoordsAtomVec;
vector<cl_int> vsiteLocalCoordsStartVec;
vector<cl_double> vsiteLocalCoordsWeightVec;
vector<mm_double4> vsiteLocalCoordsPosVec;
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
if (system.isVirtualSite(i)) { if (system.isVirtualSite(i)) {
if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) { if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
...@@ -523,65 +527,73 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -523,65 +527,73 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeightVec.push_back(mm_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0)); vsiteOutOfPlaneWeightVec.push_back(mm_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0));
} }
else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) { else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) {
// An out of plane site. // A local coordinates site.
const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i)); const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
vsiteLocalCoordsAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2))); int numParticles = site.getNumParticles();
Vec3 origin = site.getOriginWeights(); vector<double> origin, x, y;
Vec3 x = site.getXWeights(); site.getOriginWeights(origin);
Vec3 y = site.getYWeights(); site.getXWeights(x);
site.getYWeights(y);
vsiteLocalCoordsIndexVec.push_back(i);
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
for (int j = 0; j < numParticles; j++) {
vsiteLocalCoordsAtomVec.push_back(site.getParticle(j));
vsiteLocalCoordsWeightVec.push_back(origin[j]);
vsiteLocalCoordsWeightVec.push_back(x[j]);
vsiteLocalCoordsWeightVec.push_back(y[j]);
}
Vec3 pos = site.getLocalPosition(); Vec3 pos = site.getLocalPosition();
vsiteLocalCoordsParamVec.push_back(origin[0]); vsiteLocalCoordsPosVec.push_back(mm_double4(pos[0], pos[1], pos[2], 0.0));
vsiteLocalCoordsParamVec.push_back(origin[1]);
vsiteLocalCoordsParamVec.push_back(origin[2]);
vsiteLocalCoordsParamVec.push_back(x[0]);
vsiteLocalCoordsParamVec.push_back(x[1]);
vsiteLocalCoordsParamVec.push_back(x[2]);
vsiteLocalCoordsParamVec.push_back(y[0]);
vsiteLocalCoordsParamVec.push_back(y[1]);
vsiteLocalCoordsParamVec.push_back(y[2]);
vsiteLocalCoordsParamVec.push_back(pos[0]);
vsiteLocalCoordsParamVec.push_back(pos[1]);
vsiteLocalCoordsParamVec.push_back(pos[2]);
} }
} }
} }
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
int num2Avg = vsite2AvgAtomVec.size(); int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size(); int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size(); int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
int numLocalCoords = vsiteLocalCoordsAtomVec.size(); int numLocalCoords = vsiteLocalCoordsPosVec.size();
numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords; numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords;
vsite2AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms"); vsite2AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite3AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms"); vsite3AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsiteOutOfPlaneAtoms = OpenCLArray::create<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms"); vsiteOutOfPlaneAtoms = OpenCLArray::create<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteLocalCoordsAtoms = OpenCLArray::create<mm_int4>(context, max(1, numLocalCoords), "vsiteLocalCoordinatesAtoms"); vsiteLocalCoordsIndex = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
vsiteLocalCoordsAtoms = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
vsiteLocalCoordsStartIndex = OpenCLArray::create<cl_int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
if (num2Avg > 0) if (num2Avg > 0)
vsite2AvgAtoms->upload(vsite2AvgAtomVec); vsite2AvgAtoms->upload(vsite2AvgAtomVec);
if (num3Avg > 0) if (num3Avg > 0)
vsite3AvgAtoms->upload(vsite3AvgAtomVec); vsite3AvgAtoms->upload(vsite3AvgAtomVec);
if (numOutOfPlane > 0) if (numOutOfPlane > 0)
vsiteOutOfPlaneAtoms->upload(vsiteOutOfPlaneAtomVec); vsiteOutOfPlaneAtoms->upload(vsiteOutOfPlaneAtomVec);
if (numLocalCoords > 0) if (numLocalCoords > 0) {
vsiteLocalCoordsIndex->upload(vsiteLocalCoordsIndexVec);
vsiteLocalCoordsAtoms->upload(vsiteLocalCoordsAtomVec); vsiteLocalCoordsAtoms->upload(vsiteLocalCoordsAtomVec);
vsiteLocalCoordsStartIndex->upload(vsiteLocalCoordsStartVec);
}
if (context.getUseDoublePrecision()) { if (context.getUseDoublePrecision()) {
vsite2AvgWeights = OpenCLArray::create<mm_double2>(context, max(1, num2Avg), "vsite2AvgWeights"); vsite2AvgWeights = OpenCLArray::create<mm_double2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = OpenCLArray::create<mm_double4>(context, max(1, num3Avg), "vsite3AvgWeights"); vsite3AvgWeights = OpenCLArray::create<mm_double4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = OpenCLArray::create<mm_double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights"); vsiteOutOfPlaneWeights = OpenCLArray::create<mm_double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsParams = OpenCLArray::create<cl_double>(context, max(1, 12*numLocalCoords), "vsiteLocalCoordinatesParams"); vsiteLocalCoordsWeights = OpenCLArray::create<cl_double>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = OpenCLArray::create<mm_double4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0) if (num2Avg > 0)
vsite2AvgWeights->upload(vsite2AvgWeightVec); vsite2AvgWeights->upload(vsite2AvgWeightVec);
if (num3Avg > 0) if (num3Avg > 0)
vsite3AvgWeights->upload(vsite3AvgWeightVec); vsite3AvgWeights->upload(vsite3AvgWeightVec);
if (numOutOfPlane > 0) if (numOutOfPlane > 0)
vsiteOutOfPlaneWeights->upload(vsiteOutOfPlaneWeightVec); vsiteOutOfPlaneWeights->upload(vsiteOutOfPlaneWeightVec);
if (numLocalCoords > 0) if (numLocalCoords > 0) {
vsiteLocalCoordsParams->upload(vsiteLocalCoordsParamVec); vsiteLocalCoordsWeights->upload(vsiteLocalCoordsWeightVec);
vsiteLocalCoordsPos->upload(vsiteLocalCoordsPosVec);
}
} }
else { else {
vsite2AvgWeights = OpenCLArray::create<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights"); vsite2AvgWeights = OpenCLArray::create<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgWeights = OpenCLArray::create<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights"); vsite3AvgWeights = OpenCLArray::create<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneWeights = OpenCLArray::create<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights"); vsiteOutOfPlaneWeights = OpenCLArray::create<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsiteLocalCoordsParams = OpenCLArray::create<float>(context, max(1, 12*numLocalCoords), "vsiteLocalCoordinatesParams"); vsiteLocalCoordsWeights = OpenCLArray::create<cl_float>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos = OpenCLArray::create<mm_float4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
if (num2Avg > 0) { if (num2Avg > 0) {
vector<mm_float2> floatWeights(num2Avg); vector<mm_float2> floatWeights(num2Avg);
for (int i = 0; i < num2Avg; i++) for (int i = 0; i < num2Avg; i++)
...@@ -601,10 +613,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -601,10 +613,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeights->upload(floatWeights); vsiteOutOfPlaneWeights->upload(floatWeights);
} }
if (numLocalCoords > 0) { if (numLocalCoords > 0) {
vector<cl_float> floatParams(vsiteLocalCoordsParamVec.size()); vector<cl_float> floatWeights(vsiteLocalCoordsWeightVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsParamVec.size(); i++) for (int i = 0; i < (int) vsiteLocalCoordsWeightVec.size(); i++)
floatParams[i] = (cl_float) vsiteLocalCoordsParamVec[i]; floatWeights[i] = (cl_float) vsiteLocalCoordsWeightVec[i];
vsiteLocalCoordsParams->upload(floatParams); vsiteLocalCoordsWeights->upload(floatWeights);
vector<mm_float4> floatPos(vsiteLocalCoordsPosVec.size());
for (int i = 0; i < (int) vsiteLocalCoordsPosVec.size(); i++)
floatPos[i] = mm_float4((float) vsiteLocalCoordsPosVec[i].x, (float) vsiteLocalCoordsPosVec[i].y, (float) vsiteLocalCoordsPosVec[i].z, 0.0f);
vsiteLocalCoordsPos->upload(floatPos);
} }
} }
...@@ -645,8 +661,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -645,8 +661,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsParams->getDeviceBuffer()); vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos->getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex->getDeviceBuffer());
vsiteForceKernel = cl::Kernel(vsiteProgram, "distributeForces"); vsiteForceKernel = cl::Kernel(vsiteProgram, "distributeForces");
index = 0; index = 0;
vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
...@@ -661,8 +680,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -661,8 +680,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsParams->getDeviceBuffer()); vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos->getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex->getDeviceBuffer());
if (hasOverlappingVsites && context.getSupports64BitGlobalAtomics()) if (hasOverlappingVsites && context.getSupports64BitGlobalAtomics())
vsiteAddForcesKernel = cl::Kernel(vsiteProgram, "addDistributedForces"); vsiteAddForcesKernel = cl::Kernel(vsiteProgram, "addDistributedForces");
} }
...@@ -718,10 +740,16 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() { ...@@ -718,10 +740,16 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete vsiteOutOfPlaneAtoms; delete vsiteOutOfPlaneAtoms;
if (vsiteOutOfPlaneWeights != NULL) if (vsiteOutOfPlaneWeights != NULL)
delete vsiteOutOfPlaneWeights; delete vsiteOutOfPlaneWeights;
if (vsiteLocalCoordsIndex != NULL)
delete vsiteLocalCoordsIndex;
if (vsiteLocalCoordsAtoms != NULL) if (vsiteLocalCoordsAtoms != NULL)
delete vsiteLocalCoordsAtoms; delete vsiteLocalCoordsAtoms;
if (vsiteLocalCoordsParams != NULL) if (vsiteLocalCoordsWeights != NULL)
delete vsiteLocalCoordsParams; delete vsiteLocalCoordsWeights;
if (vsiteLocalCoordsPos != NULL)
delete vsiteLocalCoordsPos;
if (vsiteLocalCoordsStartIndex != NULL)
delete vsiteLocalCoordsStartIndex;
} }
void OpenCLIntegrationUtilities::setNextStepSize(double size) { void OpenCLIntegrationUtilities::setNextStepSize(double size) {
......
...@@ -106,6 +106,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo ...@@ -106,6 +106,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem()); return new OpenCLCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCompoundBondForceKernel::Name()) if (name == CalcCustomCompoundBondForceKernel::Name())
return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem()); return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCVForceKernel::Name())
return new OpenCLCalcCustomCVForceKernel(name, platform, cl);
if (name == CalcCustomManyParticleForceKernel::Name()) if (name == CalcCustomManyParticleForceKernel::Name())
return new OpenCLCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem()); return new OpenCLCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem());
if (name == CalcGayBerneForceKernel::Name()) if (name == CalcGayBerneForceKernel::Name())
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include "lepton/Operation.h" #include "lepton/Operation.h"
#include "lepton/Parser.h" #include "lepton/Parser.h"
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include <algorithm> #include <algorithm>
...@@ -138,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, ...@@ -138,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups); sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cl.reduceForces(); cl.reduceForces();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites(); cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) { if (includeEnergy)
OpenCLArray& energyArray = cl.getEnergyBuffer(); sum += cl.reduceEnergy();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double* energy = (double*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
else {
float* energy = (float*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
}
if (!cl.getForcesValid()) if (!cl.getForcesValid())
valid = false; valid = false;
return sum; return sum;
...@@ -1780,7 +1768,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1780,7 +1768,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
try { try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context); cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha); cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, false);
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines); cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
cl::Kernel addForcesKernel = cl::Kernel(program, "addForces"); cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
pmeio = new PmeIO(cl, addForcesKernel); pmeio = new PmeIO(cl, addForcesKernel);
...@@ -4734,7 +4722,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4734,7 +4722,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
const vector<int>& atoms = distance.second; const vector<int>& atoms = distance.second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]]; string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) { if (computedDeltas.count(deltaName) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n");
computedDeltas.insert(deltaName); computedDeltas.insert(deltaName);
} }
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r_"+deltaName+" = SQRT(delta"+deltaName+".w);\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r_"+deltaName+" = SQRT(delta"+deltaName+".w);\n");
...@@ -4749,11 +4737,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4749,11 +4737,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]]; string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]; string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) { if (computedDeltas.count(deltaName1) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n");
computedDeltas.insert(deltaName1); computedDeltas.insert(deltaName1);
} }
if (computedDeltas.count(deltaName2) == 0) { if (computedDeltas.count(deltaName2) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n");
computedDeltas.insert(deltaName2); computedDeltas.insert(deltaName2);
} }
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
...@@ -4771,15 +4759,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4771,15 +4759,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string crossName2 = "cross_"+deltaName2+"_"+deltaName3; string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]]; string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) { if (computedDeltas.count(deltaName1) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n");
computedDeltas.insert(deltaName1); computedDeltas.insert(deltaName1);
} }
if (computedDeltas.count(deltaName2) == 0) { if (computedDeltas.count(deltaName2) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n");
computedDeltas.insert(deltaName2); computedDeltas.insert(deltaName2);
} }
if (computedDeltas.count(deltaName3) == 0) { if (computedDeltas.count(deltaName3) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n");
computedDeltas.insert(deltaName3); computedDeltas.insert(deltaName3);
} }
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
...@@ -4798,12 +4786,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -4798,12 +4786,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName(); extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cl.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cl.intToString(i+1)+" = donor"+buffer.getName()+"[donorIndex];\n");
} }
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName(); extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cl.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n"); addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cl.intToString(i+1)+" = acceptor"+buffer.getName()+"[acceptorIndex];\n");
} }
// Now evaluate the expressions. // Now evaluate the expressions.
...@@ -6873,6 +6861,191 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() { ...@@ -6873,6 +6861,191 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
exclusionStartIndex->upload(startIndexVec); exclusionStartIndex->upload(startIndexVec);
} }
class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLContext& cl, OpenCLArray& invAtomOrder) : cl(cl), invAtomOrder(invAtomOrder) {
}
void execute() {
vector<cl_int> invOrder(cl.getPaddedNumAtoms());
const vector<int>& order = cl.getAtomIndex();
for (int i = 0; i < order.size(); i++)
invOrder[order[i]] = i;
invAtomOrder.upload(invOrder);
}
private:
OpenCLContext& cl;
OpenCLArray& invAtomOrder;
};
OpenCLCalcCustomCVForceKernel::~OpenCLCalcCustomCVForceKernel() {
for (auto force : cvForces)
delete force;
if (invAtomOrder != NULL)
delete invAtomOrder;
if (innerInvAtomOrder != NULL)
delete innerInvAtomOrder;
}
void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables();
cl.addForce(new OpenCLForceInfo(1));
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
// Create custom functions for the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Create the expressions.
Lepton::ParsedExpression energyExpr = Lepton::Parser::parse(force.getEnergyFunction(), functions);
energyExpression = energyExpr.createProgram();
for (int i = 0; i < numCVs; i++) {
string name = force.getCollectiveVariableName(i);
variableNames.push_back(name);
variableDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string name = force.getEnergyParameterDerivativeName(i);
paramDerivNames.push_back(name);
paramDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
cl.addEnergyParameterDerivative(name);
}
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
// Copy parameter derivatives from the inner context.
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
for (auto& param : cl2.getEnergyParamDerivNames())
cl.addEnergyParameterDerivative(param);
// Create arrays for storing information.
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
for (int i = 0; i < numCVs; i++)
cvForces.push_back(new OpenCLArray(cl, cl.getNumAtoms(), 4*elementSize, "cvForce"));
invAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels.
stringstream args, add;
for (int i = 0; i < numCVs; i++) {
args << ", __global real4* restrict force" << i << ", real dEdV" << i;
add << "f += force" << i << "[i]*dEdV" << i << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["ADD_FORCES"] = add.str();
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customCVForce, replacements));
copyStateKernel = cl::Kernel(program, "copyState");
copyForcesKernel = cl::Kernel(program, "copyForces");
addForcesKernel = cl::Kernel(program, "addForces");
}
double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
copyState(context, innerContext);
int numCVs = variableNames.size();
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
vector<double> cvValues;
vector<map<string, double> > cvDerivs(numCVs);
for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i]->getDeviceBuffer());
cl.executeKernel(copyForcesKernel, numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
}
// Compute the energy and forces.
map<string, double> variables;
for (auto& name : globalParameterNames)
variables[name] = context.getParameter(name);
for (int i = 0; i < numCVs; i++)
variables[variableNames[i]] = cvValues[i];
double energy = energyExpression.evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
if (cl.getUseDoublePrecision())
addForcesKernel.setArg<cl_double>(2*i+3, dEdV);
else
addForcesKernel.setArg<cl_float>(2*i+3, dEdV);
}
cl.executeKernel(addForcesKernel, numAtoms);
// Compute the energy parameter derivatives.
map<string, double>& energyParamDerivs = cl.getEnergyParamDerivWorkspace();
for (int i = 0; i < paramDerivExpressions.size(); i++)
energyParamDerivs[paramDerivNames[i]] += paramDerivExpressions[i].evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
for (auto& deriv : cvDerivs[i])
energyParamDerivs[deriv.first] += dEdV*deriv.second;
}
return energy;
}
void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cl, *invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cl2, *innerInvAtomOrder);
cl.addReorderListener(listener1);
cl2.addReorderListener(listener2);
listener1->execute();
listener2->execute();
// Initialize the kernels.
copyStateKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder->getDeviceBuffer());
copyStateKernel.setArg<cl_int>(8, numAtoms);
if (cl.getUseMixedPrecision()) {
copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(5, cl2.getPosqCorrection().getDeviceBuffer());
}
else {
copyStateKernel.setArg<void*>(1, NULL);
copyStateKernel.setArg<void*>(5, NULL);
}
copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder->getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer());
copyForcesKernel.setArg<cl_int>(4, numAtoms);
addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
addForcesKernel.setArg<cl_int>(1, numAtoms);
for (int i = 0; i < cvForces.size(); i++)
addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i]->getDeviceBuffer());
}
cl.executeKernel(copyStateKernel, numAtoms);
Vec3 a, b, c;
context.getPeriodicBoxVectors(a, b, c);
innerContext.setPeriodicBoxVectors(a, b, c);
innerContext.setTime(context.getTime());
map<string, double> innerParameters = innerContext.getParameters();
for (auto& param : innerParameters)
innerContext.setParameter(param.first, context.getParameter(param.first));
}
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() { OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
} }
...@@ -7408,6 +7581,8 @@ OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() { ...@@ -7408,6 +7581,8 @@ OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() {
delete perDofEnergyParamDerivs; delete perDofEnergyParamDerivs;
if (perDofValues != NULL) if (perDofValues != NULL)
delete perDofValues; delete perDofValues;
for (auto function : tabulatedFunctions)
delete function;
for (auto& f : savedForces) for (auto& f : savedForces)
delete f.second; delete f.second;
} }
...@@ -7424,7 +7599,8 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus ...@@ -7424,7 +7599,8 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed()); SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
} }
string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) { string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
const string suffixes[] = {".x", ".y", ".z"}; const string suffixes[] = {".x", ".y", ".z"};
string suffix = suffixes[component]; string suffix = suffixes[component];
map<string, Lepton::ParsedExpression> expressions; map<string, Lepton::ParsedExpression> expressions;
...@@ -7457,8 +7633,6 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va ...@@ -7457,8 +7633,6 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i); variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++) for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]"; variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]";
vector<const TabulatedFunction*> functions;
vector<pair<string, string> > functionNames;
string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float"); string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float");
vector<pair<ExpressionTreeNode, string> > variableNodes; vector<pair<ExpressionTreeNode, string> > variableNodes;
findExpressionsForDerivs(expr.getRootNode(), variableNodes); findExpressionsForDerivs(expr.getRootNode(), variableNodes);
...@@ -7489,16 +7663,41 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7489,16 +7663,41 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
stepTarget.resize(numSteps); stepTarget.resize(numSteps);
merged.resize(numSteps, false); merged.resize(numSteps, false);
modifiesParameters = false; modifiesParameters = false;
sumWorkGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (sumWorkGroupSize > 512)
sumWorkGroupSize = 512;
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = cl.intToString(OpenCLContext::ThreadBlockSize); defines["WORK_GROUP_SIZE"] = cl.intToString(sumWorkGroupSize);
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionNames;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) {
functionList.push_back(&integrator.getTabulatedFunction(i));
string name = integrator.getTabulatedFunctionName(i);
string arrayName = "table"+cl.intToString(i);
functionNames.push_back(make_pair(name, arrayName));
functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
if (width == 1)
tableTypes.push_back("float");
else
tableTypes.push_back("float"+cl.intToString(width));
}
// Record information about all the computation steps. // Record information about all the computation steps.
vector<string> variable(numSteps); vector<string> variable(numSteps);
vector<int> forceGroup; vector<int> forceGroup;
vector<vector<Lepton::ParsedExpression> > expression; vector<vector<Lepton::ParsedExpression> > expression;
CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup); CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup, functions);
for (int step = 0; step < numSteps; step++) { for (int step = 0; step < numSteps; step++) {
string expr; string expr;
integrator.getComputationStep(step, stepType[step], variable[step], expr); integrator.getComputationStep(step, stepType[step], variable[step], expr);
...@@ -7669,7 +7868,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7669,7 +7868,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
if (numUniform > 0) if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n"; compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]); compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j], functionList, functionNames);
if (variable[j] == "x") { if (variable[j] == "x") {
if (storePosAsDelta[j]) { if (storePosAsDelta[j]) {
if (cl.getSupportsDoublePrecision()) if (cl.getSupportsDoublePrecision())
...@@ -7704,6 +7903,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7704,6 +7903,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
string valueName = "perDofValues"+cl.intToString(i+1); string valueName = "perDofValues"+cl.intToString(i+1);
args << ", __global " << buffer.getType() << "* restrict " << valueName; args << ", __global " << buffer.getType() << "* restrict " << valueName;
} }
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
if (loadPosAsDelta[step]) if (loadPosAsDelta[step])
defines["LOAD_POS_AS_DELTA"] = "1"; defines["LOAD_POS_AS_DELTA"] = "1";
...@@ -7727,6 +7928,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7727,6 +7928,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (auto& buffer : perDofValues->getBuffers()) for (auto& buffer : perDofValues->getBuffers())
kernel.setArg<cl::Memory>(index++, buffer.getMemory()); kernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
if (stepType[step] == CustomIntegrator::ComputeSum) { if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values. // Create a second kernel for this step that sums the values.
...@@ -7789,7 +7992,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7789,7 +7992,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
} }
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize(); Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", ""); computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "", functionList, functionNames);
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_STEP"] = computeKE.str(); replacements["COMPUTE_STEP"] = computeKE.str();
stringstream args; stringstream args;
...@@ -7798,6 +8001,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7798,6 +8001,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
string valueName = "perDofValues"+cl.intToString(i+1); string valueName = "perDofValues"+cl.intToString(i+1);
args << ", __global " << buffer.getType() << "* restrict " << valueName; args << ", __global " << buffer.getType() << "* restrict " << valueName;
} }
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
if (defines.find("LOAD_POS_AS_DELTA") != defines.end()) if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA"); defines.erase("LOAD_POS_AS_DELTA");
...@@ -7821,6 +8026,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7821,6 +8026,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory()); kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
for (auto array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f"); keNeedsForce = usesVariable(keExpression, "f");
// Create a second kernel to sum the values. // Create a second kernel to sum the values.
...@@ -7831,8 +8038,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7831,8 +8038,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer()); sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms); sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms);
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
} }
// Make sure all values (variables, parameters, etc.) are up to date. // Make sure all values (variables, parameters, etc.) are up to date.
if (!deviceValuesAreCurrent) { if (!deviceValuesAreCurrent) {
...@@ -7900,19 +8112,26 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -7900,19 +8112,26 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities(); OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
int numAtoms = cl.getNumAtoms(); int numAtoms = cl.getNumAtoms();
int numSteps = integrator.getNumComputations(); int numSteps = integrator.getNumComputations();
if (!forcesAreValid)
savedEnergy.clear();
// Loop over computation steps in the integrator and execute them. // Loop over computation steps in the integrator and execute them.
for (int step = 0; step < numSteps; ) { for (int step = 0; step < numSteps; ) {
int nextStep = step+1; int nextStep = step+1;
int forceGroups = forceGroupFlags[step];
int lastForceGroups = context.getLastForceGroups(); int lastForceGroups = context.getLastForceGroups();
if ((needsForces[step] || needsEnergy[step]) && (!forcesAreValid || lastForceGroups != forceGroupFlags[step])) { bool haveForces = (!needsForces[step] || (forcesAreValid && lastForceGroups == forceGroups));
if (forcesAreValid && savedForces.find(lastForceGroups) != savedForces.end()) { bool haveEnergy = (!needsEnergy[step] || savedEnergy.find(forceGroups) != savedEnergy.end());
// The forces are still valid. We just need a different force group right now. Save the old if (!haveForces || !haveEnergy) {
// forces in case we need them again. if (forcesAreValid) {
if (savedForces.find(lastForceGroups) != savedForces.end() && validSavedForces.find(lastForceGroups) == validSavedForces.end()) {
cl.getForce().copyTo(*savedForces[lastForceGroups]); // The forces are still valid. We just need a different force group right now. Save the old
validSavedForces.insert(lastForceGroups); // forces in case we need them again.
cl.getForce().copyTo(*savedForces[lastForceGroups]);
validSavedForces.insert(lastForceGroups);
}
} }
else else
validSavedForces.clear(); validSavedForces.clear();
...@@ -7922,14 +8141,16 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -7922,14 +8141,16 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
bool computeForce = (needsForces[step] || computeBothForceAndEnergy[step]); bool computeForce = (needsForces[step] || computeBothForceAndEnergy[step]);
bool computeEnergy = (needsEnergy[step] || computeBothForceAndEnergy[step]); bool computeEnergy = (needsEnergy[step] || computeBothForceAndEnergy[step]);
if (!computeEnergy && validSavedForces.find(forceGroupFlags[step]) != validSavedForces.end()) { if (!computeEnergy && validSavedForces.find(forceGroups) != validSavedForces.end()) {
// We can just restore the forces we saved earlier. // We can just restore the forces we saved earlier.
savedForces[forceGroupFlags[step]]->copyTo(cl.getForce()); savedForces[forceGroups]->copyTo(cl.getForce());
context.getLastForceGroups() = forceGroups;
} }
else { else {
recordChangedParameters(context); recordChangedParameters(context);
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroupFlags[step]); energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroups);
savedEnergy[forceGroups] = energy;
if (needsEnergyParamDerivs) { if (needsEnergyParamDerivs) {
context.getEnergyParameterDerivatives(energyParamDerivs); context.getEnergyParameterDerivatives(energyParamDerivs);
if (perDofEnergyParamDerivNames.size() > 0) { if (perDofEnergyParamDerivNames.size() > 0) {
...@@ -7948,6 +8169,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -7948,6 +8169,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
forcesAreValid = true; forcesAreValid = true;
} }
} }
if (needsEnergy[step])
energy = savedEnergy[forceGroups];
if (needsGlobals[step] && !deviceGlobalsAreCurrent) { if (needsGlobals[step] && !deviceGlobalsAreCurrent) {
// Upload the global values to the device. // Upload the global values to the device.
...@@ -7959,6 +8182,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -7959,6 +8182,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
globalValues->upload(globalValuesFloat); globalValues->upload(globalValuesFloat);
} }
} }
bool stepInvalidatesForces = invalidatesForces[step];
if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) { if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) {
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step])); kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
...@@ -7989,7 +8213,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -7989,7 +8213,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl.executeKernel(randomKernel, numAtoms); cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer); cl.clearBuffer(*sumBuffer);
cl.executeKernel(kernels[step][0], numAtoms, 128); cl.executeKernel(kernels[step][0], numAtoms, 128);
cl.executeKernel(kernels[step][1], OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize); cl.executeKernel(kernels[step][1], sumWorkGroupSize, sumWorkGroupSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value; double value;
summedValue->download(&value); summedValue->download(&value);
...@@ -8003,7 +8227,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -8003,7 +8227,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
} }
else if (stepType[step] == CustomIntegrator::UpdateContextState) { else if (stepType[step] == CustomIntegrator::UpdateContextState) {
recordChangedParameters(context); recordChangedParameters(context);
context.updateContextState(); stepInvalidatesForces = context.updateContextState();
} }
else if (stepType[step] == CustomIntegrator::ConstrainPositions) { else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
if (hasAnyConstraints) { if (hasAnyConstraints) {
...@@ -8027,8 +8251,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -8027,8 +8251,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
if (blockEnd[step] != -1) if (blockEnd[step] != -1)
nextStep = blockEnd[step]; // Return to the start of a while block. nextStep = blockEnd[step]; // Return to the start of a while block.
} }
if (invalidatesForces[step]) if (stepInvalidatesForces) {
forcesAreValid = false; forcesAreValid = false;
savedEnergy.clear();
}
step = nextStep; step = nextStep;
} }
recordChangedParameters(context); recordChangedParameters(context);
...@@ -8089,7 +8315,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -8089,7 +8315,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(9, 0); kineticEnergyKernel.setArg<cl_uint>(9, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms()); cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize); cl.executeKernel(sumKineticEnergyKernel, sumWorkGroupSize, sumWorkGroupSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double ke; double ke;
summedValue->download(&ke); summedValue->download(&ke);
...@@ -8246,6 +8472,8 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) { ...@@ -8246,6 +8472,8 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() { OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
if (savedPositions != NULL) if (savedPositions != NULL)
delete savedPositions; delete savedPositions;
if (savedForces != NULL)
delete savedForces;
if (moleculeAtoms != NULL) if (moleculeAtoms != NULL)
delete moleculeAtoms; delete moleculeAtoms;
if (moleculeStartIndex != NULL) if (moleculeStartIndex != NULL)
...@@ -8254,6 +8482,7 @@ OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() { ...@@ -8254,6 +8482,7 @@ OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) { void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions"); savedPositions = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedForces = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat); cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat);
kernel = cl::Kernel(program, "scalePositions"); kernel = cl::Kernel(program, "scalePositions");
} }
...@@ -8289,6 +8518,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -8289,6 +8518,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
} }
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy); cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), savedForces->getDeviceBuffer(), 0, 0, bytesToCopy);
kernel.setArg<cl_float>(0, (cl_float) scaleX); kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(1, (cl_float) scaleY); kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(2, (cl_float) scaleZ); kernel.setArg<cl_float>(2, (cl_float) scaleZ);
...@@ -8302,6 +8532,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -8302,6 +8532,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) { void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4)); int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy); cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(savedForces->getDeviceBuffer(), cl.getForce().getDeviceBuffer(), 0, 0, bytesToCopy);
} }
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() { OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. * * Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -82,6 +82,7 @@ OpenCLPlatform::OpenCLPlatform() { ...@@ -82,6 +82,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory); registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory); registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory); registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
...@@ -179,7 +180,20 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri ...@@ -179,7 +180,20 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
char* threadsEnv = getenv("OPENMM_CPU_THREADS"); char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL) if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads; stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue, pmeStreamPropValue, threads)); context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue,
pmeStreamPropValue, threads, NULL));
}
void OpenCLPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
Platform& platform = originalContext.getPlatform();
string platformPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLPlatformIndex());
string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLDeviceIndex());
string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLPrecision());
string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLUseCpuPme());
string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLDisablePmeStream());
int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue,
pmeStreamPropValue, threads, &originalContext));
} }
void OpenCLPlatform::contextDestroyed(ContextImpl& context) const { void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
...@@ -188,7 +202,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -188,7 +202,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
} }
OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& platformPropValue, const string& deviceIndexProperty, OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& platformPropValue, const string& deviceIndexProperty,
const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty, int numThreads) : const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty, int numThreads, ContextImpl* originalContext) :
removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) { removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
int platformIndex = -1; int platformIndex = -1;
if (platformPropValue.length() > 0) if (platformPropValue.length() > 0)
...@@ -200,16 +214,19 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p ...@@ -200,16 +214,19 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
searchPos = nextPos+1; searchPos = nextPos+1;
} }
devices.push_back(deviceIndexProperty.substr(searchPos)); devices.push_back(deviceIndexProperty.substr(searchPos));
PlatformData* originalData = NULL;
if (originalContext != NULL)
originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
try { try {
for (int i = 0; i < (int) devices.size(); i++) { for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) { if (devices[i].length() > 0) {
int deviceIndex; int deviceIndex;
stringstream(devices[i]) >> deviceIndex; stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this)); contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
} }
} }
if (contexts.size() == 0) if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this)); contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
} }
catch (...) { catch (...) {
// If an exception was thrown, do our best to clean up memory. // If an exception was thrown, do our best to clean up memory.
......
/**
* Copy the positions and velocities to the inner context.
*/
__kernel void copyState(__global real4* posq, __global real4* posqCorrection, __global mixed4* velm, __global int* restrict atomOrder,
__global real4* innerPosq, __global real4* innerPosqCorrection, __global mixed4* innerVelm, __global int* restrict innerInvAtomOrder,
int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = innerInvAtomOrder[atomOrder[i]];
innerPosq[index] = posq[i];
innerVelm[index] = velm[i];
#ifdef USE_MIXED_PRECISION
innerPosqCorrection[index] = posqCorrection[i];
#endif
}
}
/**
* Copy the forces back to the main context.
*/
__kernel void copyForces(__global real4* forces, __global int* restrict invAtomOrder, __global real4* innerForces,
__global int* restrict innerAtomOrder, int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = invAtomOrder[innerAtomOrder[i]];
forces[index] = innerForces[i];
}
}
/**
* Add all the forces from the CVs.
*/
__kernel void addForces(__global real4* forces, int numAtoms
PARAMETER_ARGUMENTS) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
real4 f = forces[i];
ADD_FORCES
forces[i] = f;
}
}
\ No newline at end of file
/** /**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude. * Compute the difference between two vectors, optionally taking periodic boundary conditions into account
*/
real4 delta(real4 vec1, real4 vec2) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
/**
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude. * and setting the fourth component to the squared magnitude.
*/ */
real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) { real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0); real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(result) APPLY_PERIODIC_TO_DELTA(result)
...@@ -81,6 +72,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global ...@@ -81,6 +72,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += get_local_size(0)) { for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += get_local_size(0)) {
// Load the next block of acceptors into local memory. // Load the next block of acceptors into local memory.
barrier(CLK_LOCAL_MEM_FENCE);
int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart); int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart);
if (get_local_id(0) < blockSize) { if (get_local_id(0) < blockSize) {
int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)]; int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)];
...@@ -91,8 +83,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global ...@@ -91,8 +83,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (donorIndex < NUM_DONORS) { if (donorIndex < NUM_DONORS) {
for (int index = 0; index < blockSize; index++) { for (int index = 0; index < blockSize; index++) {
#ifdef USE_EXCLUSIONS
int acceptorIndex = acceptorStart+index; int acceptorIndex = acceptorStart+index;
#ifdef USE_EXCLUSIONS
if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w) if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w)
continue; continue;
#endif #endif
...@@ -101,7 +93,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global ...@@ -101,7 +93,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real4 a1 = posBuffer[3*index]; real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1]; real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2]; real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ); real4 deltaD1A1 = delta(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) { if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif #endif
...@@ -169,6 +161,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo ...@@ -169,6 +161,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += get_local_size(0)) { for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += get_local_size(0)) {
// Load the next block of donors into local memory. // Load the next block of donors into local memory.
barrier(CLK_LOCAL_MEM_FENCE);
int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart); int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart);
if (get_local_id(0) < blockSize) { if (get_local_id(0) < blockSize) {
int4 atoms2 = donorAtoms[donorStart+get_local_id(0)]; int4 atoms2 = donorAtoms[donorStart+get_local_id(0)];
...@@ -179,8 +172,8 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo ...@@ -179,8 +172,8 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (acceptorIndex < NUM_ACCEPTORS) { if (acceptorIndex < NUM_ACCEPTORS) {
for (int index = 0; index < blockSize; index++) { for (int index = 0; index < blockSize; index++) {
#ifdef USE_EXCLUSIONS
int donorIndex = donorStart+index; int donorIndex = donorStart+index;
#ifdef USE_EXCLUSIONS
if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z || donorIndex == exclusionIndices.w) if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z || donorIndex == exclusionIndices.w)
continue; continue;
#endif #endif
...@@ -189,7 +182,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo ...@@ -189,7 +182,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
real4 d1 = posBuffer[3*index]; real4 d1 = posBuffer[3*index];
real4 d2 = posBuffer[3*index+1]; real4 d2 = posBuffer[3*index+1];
real4 d3 = posBuffer[3*index+2]; real4 d3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ); real4 deltaD1A1 = delta(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) { if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif #endif
......
...@@ -56,6 +56,11 @@ inline real4 computeCross(real4 vec1, real4 vec2) { ...@@ -56,6 +56,11 @@ inline real4 computeCross(real4 vec1, real4 vec2) {
* Determine whether a particular interaction is in the list of exclusions. * Determine whether a particular interaction is in the list of exclusions.
*/ */
inline bool isInteractionExcluded(int atom1, int atom2, __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex) { inline bool isInteractionExcluded(int atom1, int atom2, __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex) {
if (atom1 > atom2) {
int temp = atom1;
atom1 = atom2;
atom2 = temp;
}
int first = exclusionStartIndex[atom1]; int first = exclusionStartIndex[atom1];
int last = exclusionStartIndex[atom1+1]; int last = exclusionStartIndex[atom1+1];
for (int i = last-1; i >= first; i--) { for (int i = last-1; i >= first; i--) {
......
...@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re ...@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
} }
#endif #endif
/**
* Sum the energy buffer.
*/
__kernel void reduceEnergy(__global const mixed* restrict energyBuffer, __global mixed* restrict result, int bufferSize, int workGroupSize, __local mixed* tempBuffer) {
const unsigned int thread = get_local_id(0);
mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += get_local_size(0))
sum += energyBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
barrier(CLK_LOCAL_MEM_FENCE);
if (thread%(i*2) == 0 && thread+i < workGroupSize)
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
*result = tempBuffer[0];
}
/** /**
* This is called to determine the accuracy of various native functions. * This is called to determine the accuracy of various native functions.
*/ */
......
...@@ -33,7 +33,9 @@ __kernel void computeVirtualSites(__global real4* restrict posq, ...@@ -33,7 +33,9 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
__global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights, __global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
__global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights, __global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights, __global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights,
__global const int4* restrict localCoordsAtoms, __global const real* restrict localCoordsParams) { __global const int* restrict localCoordsIndex, __global const int* restrict localCoordsAtoms,
__global const real* restrict localCoordsWeights, __global const real4* restrict localCoordsPos,
__global const int* restrict localCoordsStartIndex) {
#ifndef USE_MIXED_PRECISION #ifndef USE_MIXED_PRECISION
__global real4* posqCorrection = 0; __global real4* posqCorrection = 0;
#endif #endif
...@@ -81,30 +83,30 @@ __kernel void computeVirtualSites(__global real4* restrict posq, ...@@ -81,30 +83,30 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
// Local coordinates sites. // Local coordinates sites.
for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4 atoms = localCoordsAtoms[index]; int siteAtomIndex = localCoordsIndex[index];
__global const real* params = &localCoordsParams[12*index]; int start = localCoordsStartIndex[index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x); int end = localCoordsStartIndex[index+1];
mixed4 pos1_4 = loadPos(posq, posqCorrection, atoms.y); mixed3 origin = 0, xdir = 0, ydir = 0;
mixed4 pos2_4 = loadPos(posq, posqCorrection, atoms.z); for (int j = start; j < end; j++) {
mixed4 pos3_4 = loadPos(posq, posqCorrection, atoms.w); mixed3 pos = loadPos(posq, posqCorrection, localCoordsAtoms[j]).xyz;
mixed4 pos1 = (mixed4) (pos1_4.x, pos1_4.y, pos1_4.z, 0); origin += pos*localCoordsWeights[3*j];
mixed4 pos2 = (mixed4) (pos2_4.x, pos2_4.y, pos2_4.z, 0); xdir += pos*localCoordsWeights[3*j+1];
mixed4 pos3 = (mixed4) (pos3_4.x, pos3_4.y, pos3_4.z, 0); ydir += pos*localCoordsWeights[3*j+2];
mixed4 originWeights = (mixed4) (params[0], params[1], params[2], 0); }
mixed4 xWeights = (mixed4) (params[3], params[4], params[5], 0); mixed3 zdir = cross(xdir, ydir);
mixed4 yWeights = (mixed4) (params[6], params[7], params[8], 0); mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed4 localPosition = (mixed4) (params[9], params[10], params[11], 0); mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed4 origin = pos1*originWeights.x + pos2*originWeights.y + pos3*originWeights.z; mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
mixed4 xdir = pos1*xWeights.x + pos2*xWeights.y + pos3*xWeights.z; mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
mixed4 ydir = pos1*yWeights.x + pos2*yWeights.y + pos3*yWeights.z; xdir *= invNormXdir;
mixed4 zdir = cross(xdir, ydir); zdir *= invNormZdir;
xdir *= rsqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
zdir *= rsqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
ydir = cross(zdir, xdir); ydir = cross(zdir, xdir);
mixed3 localPosition = convert_mixed4(localCoordsPos[index]).xyz;
mixed4 pos = loadPos(posq, posqCorrection, siteAtomIndex);
pos.x = origin.x + xdir.x*localPosition.x + ydir.x*localPosition.y + zdir.x*localPosition.z; pos.x = origin.x + xdir.x*localPosition.x + ydir.x*localPosition.y + zdir.x*localPosition.z;
pos.y = origin.y + xdir.y*localPosition.x + ydir.y*localPosition.y + zdir.y*localPosition.z; pos.y = origin.y + xdir.y*localPosition.x + ydir.y*localPosition.y + zdir.y*localPosition.z;
pos.z = origin.z + xdir.z*localPosition.x + ydir.z*localPosition.y + zdir.z*localPosition.z; pos.z = origin.z + xdir.z*localPosition.x + ydir.z*localPosition.y + zdir.z*localPosition.z;
storePos(posq, posqCorrection, atoms.x, pos); storePos(posq, posqCorrection, siteAtomIndex, pos);
} }
} }
...@@ -174,7 +176,9 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea ...@@ -174,7 +176,9 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
__global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights, __global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
__global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights, __global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights, __global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights,
__global const int4* restrict localCoordsAtoms, __global const real* restrict localCoordsParams) { __global const int* restrict localCoordsIndex, __global const int* restrict localCoordsAtoms,
__global const real* restrict localCoordsWeights, __global const real4* restrict localCoordsPos,
__global const int* restrict localCoordsStartIndex) {
#ifndef USE_MIXED_PRECISION #ifndef USE_MIXED_PRECISION
__global real4* posqCorrection = 0; __global real4* posqCorrection = 0;
#endif #endif
...@@ -225,86 +229,54 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea ...@@ -225,86 +229,54 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
// Local coordinates sites. // Local coordinates sites.
for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4 atoms = localCoordsAtoms[index]; int siteAtomIndex = localCoordsIndex[index];
__global const real* params = &localCoordsParams[12*index]; int start = localCoordsStartIndex[index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x); int end = localCoordsStartIndex[index+1];
mixed4 pos1_4 = loadPos(posq, posqCorrection, atoms.y); mixed3 origin = 0, xdir = 0, ydir = 0;
mixed4 pos2_4 = loadPos(posq, posqCorrection, atoms.z); for (int j = start; j < end; j++) {
mixed4 pos3_4 = loadPos(posq, posqCorrection, atoms.w); mixed3 pos = loadPos(posq, posqCorrection, localCoordsAtoms[j]).xyz;
mixed4 pos1 = (mixed4) (pos1_4.x, pos1_4.y, pos1_4.z, 0); origin += pos*localCoordsWeights[3*j];
mixed4 pos2 = (mixed4) (pos2_4.x, pos2_4.y, pos2_4.z, 0); xdir += pos*localCoordsWeights[3*j+1];
mixed4 pos3 = (mixed4) (pos3_4.x, pos3_4.y, pos3_4.z, 0); ydir += pos*localCoordsWeights[3*j+2];
mixed4 originWeights = (mixed4) (params[0], params[1], params[2], 0); }
mixed4 wx = (mixed4) (params[3], params[4], params[5], 0); mixed3 zdir = cross(xdir, ydir);
mixed4 wy = (mixed4) (params[6], params[7], params[8], 0); mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
mixed4 localPosition = (mixed4) (params[9], params[10], params[11], 0); mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
mixed4 origin = pos1*originWeights.x + pos2*originWeights.y + pos3*originWeights.z; mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
mixed4 xdir = pos1*wx.x + pos2*wx.y + pos3*wx.z; mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
mixed4 ydir = pos1*wy.x + pos2*wy.y + pos3*wy.z; mixed3 dx = xdir*invNormXdir;
mixed4 zdir = cross(xdir, ydir); mixed3 dz = zdir*invNormZdir;
mixed invNormXdir = rsqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z); mixed3 dy = cross(dz, dx);
mixed invNormZdir = rsqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z); mixed3 localPosition = convert_mixed4(localCoordsPos[index]).xyz;
mixed4 dx = xdir*invNormXdir;
mixed4 dz = zdir*invNormZdir;
mixed4 dy = cross(dz, dx);
// The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand. // The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand.
mixed t11 = (wx.x*ydir.x-wy.x*xdir.x)*invNormZdir; real4 f = force[siteAtomIndex];
mixed t12 = (wx.x*ydir.y-wy.x*xdir.y)*invNormZdir; mixed3 fp1 = localPosition*f.x;
mixed t13 = (wx.x*ydir.z-wy.x*xdir.z)*invNormZdir; mixed3 fp2 = localPosition*f.y;
mixed t21 = (wx.y*ydir.x-wy.y*xdir.x)*invNormZdir; mixed3 fp3 = localPosition*f.z;
mixed t22 = (wx.y*ydir.y-wy.y*xdir.y)*invNormZdir; for (int j = start; j < end; j++) {
mixed t23 = (wx.y*ydir.z-wy.y*xdir.z)*invNormZdir; real originWeight = localCoordsWeights[3*j];
mixed t31 = (wx.z*ydir.x-wy.z*xdir.x)*invNormZdir; real wx = localCoordsWeights[3*j+1];
mixed t32 = (wx.z*ydir.y-wy.z*xdir.y)*invNormZdir; real wy = localCoordsWeights[3*j+2];
mixed t33 = (wx.z*ydir.z-wy.z*xdir.z)*invNormZdir; mixed wxScaled = wx*invNormXdir;
mixed sx1 = t13*dz.y-t12*dz.z; mixed t1 = (wx*ydir.x-wy*xdir.x)*invNormZdir;
mixed sy1 = t11*dz.z-t13*dz.x; mixed t2 = (wx*ydir.y-wy*xdir.y)*invNormZdir;
mixed sz1 = t12*dz.x-t11*dz.y; mixed t3 = (wx*ydir.z-wy*xdir.z)*invNormZdir;
mixed sx2 = t23*dz.y-t22*dz.z; mixed sx = t3*dz.y-t2*dz.z;
mixed sy2 = t21*dz.z-t23*dz.x; mixed sy = t1*dz.z-t3*dz.x;
mixed sz2 = t22*dz.x-t21*dz.y; mixed sz = t2*dz.x-t1*dz.y;
mixed sx3 = t33*dz.y-t32*dz.z; real4 fresult = 0;
mixed sy3 = t31*dz.z-t33*dz.x; fresult.x += fp1.x*wxScaled*(1-dx.x*dx.x) + fp1.z*(dz.x*sx ) + fp1.y*((-dx.x*dy.x )*wxScaled + dy.x*sx - dx.y*t2 - dx.z*t3) + f.x*originWeight;
mixed sz3 = t32*dz.x-t31*dz.y; fresult.y += fp1.x*wxScaled*( -dx.x*dx.y) + fp1.z*(dz.x*sy+t3) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled + dy.x*sy + dx.y*t1);
mixed4 wxScaled = wx*invNormXdir; fresult.z += fp1.x*wxScaled*( -dx.x*dx.z) + fp1.z*(dz.x*sz-t2) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled + dy.x*sz + dx.z*t1);
real4 f = force[atoms.x]; fresult.x += fp2.x*wxScaled*( -dx.y*dx.x) + fp2.z*(dz.y*sx-t3) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled - dy.y*sx - dx.x*t2);
real4 f1 = 0; fresult.y += fp2.x*wxScaled*(1-dx.y*dx.y) + fp2.z*(dz.y*sy ) - fp2.y*(( dx.y*dy.y )*wxScaled - dy.y*sy + dx.x*t1 + dx.z*t3) + f.y*originWeight;
real4 f2 = 0; fresult.z += fp2.x*wxScaled*( -dx.y*dx.z) + fp2.z*(dz.y*sz+t1) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled - dy.y*sz - dx.z*t2);
real4 f3 = 0; fresult.x += fp3.x*wxScaled*( -dx.z*dx.x) + fp3.z*(dz.z*sx+t2) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled + dy.z*sx + dx.x*t3);
mixed4 fp1 = localPosition*f.x; fresult.y += fp3.x*wxScaled*( -dx.z*dx.y) + fp3.z*(dz.z*sy-t1) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled + dy.z*sy + dx.y*t3);
mixed4 fp2 = localPosition*f.y; fresult.z += fp3.x*wxScaled*(1-dx.z*dx.z) + fp3.z*(dz.z*sz ) + fp3.y*((-dx.z*dy.z )*wxScaled + dy.z*sz - dx.x*t1 - dx.y*t2) + f.z*originWeight;
mixed4 fp3 = localPosition*f.z; ADD_FORCE(localCoordsAtoms[j], fresult);
f1.x += fp1.x*wxScaled.x*(1-dx.x*dx.x) + fp1.z*(dz.x*sx1 ) + fp1.y*((-dx.x*dy.x )*wxScaled.x + dy.x*sx1 - dx.y*t12 - dx.z*t13) + f.x*originWeights.x; }
f1.y += fp1.x*wxScaled.x*( -dx.x*dx.y) + fp1.z*(dz.x*sy1+t13) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.x + dy.x*sy1 + dx.y*t11);
f1.z += fp1.x*wxScaled.x*( -dx.x*dx.z) + fp1.z*(dz.x*sz1-t12) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.x + dy.x*sz1 + dx.z*t11);
f2.x += fp1.x*wxScaled.y*(1-dx.x*dx.x) + fp1.z*(dz.x*sx2 ) + fp1.y*((-dx.x*dy.x )*wxScaled.y + dy.x*sx2 - dx.y*t22 - dx.z*t23) + f.x*originWeights.y;
f2.y += fp1.x*wxScaled.y*( -dx.x*dx.y) + fp1.z*(dz.x*sy2+t23) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.y + dy.x*sy2 + dx.y*t21);
f2.z += fp1.x*wxScaled.y*( -dx.x*dx.z) + fp1.z*(dz.x*sz2-t22) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.y + dy.x*sz2 + dx.z*t21);
f3.x += fp1.x*wxScaled.z*(1-dx.x*dx.x) + fp1.z*(dz.x*sx3 ) + fp1.y*((-dx.x*dy.x )*wxScaled.z + dy.x*sx3 - dx.y*t32 - dx.z*t33) + f.x*originWeights.z;
f3.y += fp1.x*wxScaled.z*( -dx.x*dx.y) + fp1.z*(dz.x*sy3+t33) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled.z + dy.x*sy3 + dx.y*t31);
f3.z += fp1.x*wxScaled.z*( -dx.x*dx.z) + fp1.z*(dz.x*sz3-t32) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled.z + dy.x*sz3 + dx.z*t31);
f1.x += fp2.x*wxScaled.x*( -dx.y*dx.x) + fp2.z*(dz.y*sx1-t13) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.x - dy.y*sx1 - dx.x*t12);
f1.y += fp2.x*wxScaled.x*(1-dx.y*dx.y) + fp2.z*(dz.y*sy1 ) - fp2.y*(( dx.y*dy.y )*wxScaled.x - dy.y*sy1 + dx.x*t11 + dx.z*t13) + f.y*originWeights.x;
f1.z += fp2.x*wxScaled.x*( -dx.y*dx.z) + fp2.z*(dz.y*sz1+t11) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.x - dy.y*sz1 - dx.z*t12);
f2.x += fp2.x*wxScaled.y*( -dx.y*dx.x) + fp2.z*(dz.y*sx2-t23) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.y - dy.y*sx2 - dx.x*t22);
f2.y += fp2.x*wxScaled.y*(1-dx.y*dx.y) + fp2.z*(dz.y*sy2 ) - fp2.y*(( dx.y*dy.y )*wxScaled.y - dy.y*sy2 + dx.x*t21 + dx.z*t23) + f.y*originWeights.y;
f2.z += fp2.x*wxScaled.y*( -dx.y*dx.z) + fp2.z*(dz.y*sz2+t21) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.y - dy.y*sz2 - dx.z*t22);
f3.x += fp2.x*wxScaled.z*( -dx.y*dx.x) + fp2.z*(dz.y*sx3-t33) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled.z - dy.y*sx3 - dx.x*t32);
f3.y += fp2.x*wxScaled.z*(1-dx.y*dx.y) + fp2.z*(dz.y*sy3 ) - fp2.y*(( dx.y*dy.y )*wxScaled.z - dy.y*sy3 + dx.x*t31 + dx.z*t33) + f.y*originWeights.z;
f3.z += fp2.x*wxScaled.z*( -dx.y*dx.z) + fp2.z*(dz.y*sz3+t31) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled.z - dy.y*sz3 - dx.z*t32);
f1.x += fp3.x*wxScaled.x*( -dx.z*dx.x) + fp3.z*(dz.z*sx1+t12) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.x + dy.z*sx1 + dx.x*t13);
f1.y += fp3.x*wxScaled.x*( -dx.z*dx.y) + fp3.z*(dz.z*sy1-t11) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.x + dy.z*sy1 + dx.y*t13);
f1.z += fp3.x*wxScaled.x*(1-dx.z*dx.z) + fp3.z*(dz.z*sz1 ) + fp3.y*((-dx.z*dy.z )*wxScaled.x + dy.z*sz1 - dx.x*t11 - dx.y*t12) + f.z*originWeights.x;
f2.x += fp3.x*wxScaled.y*( -dx.z*dx.x) + fp3.z*(dz.z*sx2+t22) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.y + dy.z*sx2 + dx.x*t23);
f2.y += fp3.x*wxScaled.y*( -dx.z*dx.y) + fp3.z*(dz.z*sy2-t21) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.y + dy.z*sy2 + dx.y*t23);
f2.z += fp3.x*wxScaled.y*(1-dx.z*dx.z) + fp3.z*(dz.z*sz2 ) + fp3.y*((-dx.z*dy.z )*wxScaled.y + dy.z*sz2 - dx.x*t21 - dx.y*t22) + f.z*originWeights.y;
f3.x += fp3.x*wxScaled.z*( -dx.z*dx.x) + fp3.z*(dz.z*sx3+t32) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled.z + dy.z*sx3 + dx.x*t33);
f3.y += fp3.x*wxScaled.z*( -dx.z*dx.y) + fp3.z*(dz.z*sy3-t31) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled.z + dy.z*sy3 + dx.y*t33);
f3.z += fp3.x*wxScaled.z*(1-dx.z*dx.z) + fp3.z*(dz.z*sz3 ) + fp3.y*((-dx.z*dy.z )*wxScaled.z + dy.z*sz3 - dx.x*t31 - dx.y*t32) + f.z*originWeights.z;
ADD_FORCE(atoms.y, f1);
ADD_FORCE(atoms.z, f2);
ADD_FORCE(atoms.w, f3);
} }
} }
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestCustomCVForce.h"
void runPlatformTests() {
}
...@@ -54,7 +54,7 @@ template <class Real2> ...@@ -54,7 +54,7 @@ template <class Real2>
void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
......
...@@ -54,7 +54,7 @@ void testGaussian() { ...@@ -54,7 +54,7 @@ void testGaussian() {
System system; System system;
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0); system.addParticle(1.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) { ...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenCLArray data(context, array.size(), sizeof(float), "sortData"); OpenCLArray data(context, array.size(), sizeof(float), "sortData");
......
/* Portions copyright (c) 2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __ReferenceCustomCVForce_H__
#define __ReferenceCustomCVForce_H__
#include "openmm/CustomCVForce.h"
#include "openmm/internal/ContextImpl.h"
#include "lepton/ExpressionProgram.h"
#include <map>
#include <string>
#include <vector>
namespace OpenMM {
class ReferenceCustomCVForce {
private:
Lepton::ExpressionProgram energyExpression;
std::vector<std::string> variableNames, paramDerivNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
public:
/**
* Constructor
*/
ReferenceCustomCVForce(const OpenMM::CustomCVForce& force);
/**
* Destructor
*/
~ReferenceCustomCVForce();
/**
* Calculate the interaction.
*
* @param innerContext the context created by the force for evaluating collective variables
* @param atomCoordinates atom coordinates
* @param globalParameters the values of global parameters
* @param forces the forces are added to this
* @param totalEnergy the energy is added to this
* @param energyParamDerivs parameter derivatives are added to this
*/
void calculateIxn(ContextImpl& innerContext, std::vector<OpenMM::Vec3>& atomCoordinates,
const std::map<std::string, double>& globalParameters,
std::vector<OpenMM::Vec3>& forces, double* totalEnergy, std::map<std::string, double>& energyParamDerivs) const;
};
} // namespace OpenMM
#endif // __ReferenceCustomCVForce_H__
...@@ -45,6 +45,7 @@ class ReferenceObc; ...@@ -45,6 +45,7 @@ class ReferenceObc;
class ReferenceAndersenThermostat; class ReferenceAndersenThermostat;
class ReferenceCustomCentroidBondIxn; class ReferenceCustomCentroidBondIxn;
class ReferenceCustomCompoundBondIxn; class ReferenceCustomCompoundBondIxn;
class ReferenceCustomCVForce;
class ReferenceCustomHbondIxn; class ReferenceCustomHbondIxn;
class ReferenceCustomManyParticleIxn; class ReferenceCustomManyParticleIxn;
class ReferenceGayBerneForce; class ReferenceGayBerneForce;
...@@ -1006,6 +1007,44 @@ private: ...@@ -1006,6 +1007,44 @@ private:
ReferenceGayBerneForce* ixn; ReferenceGayBerneForce* ixn;
}; };
/**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/
class ReferenceCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public:
ReferenceCalcCustomCVForceKernel(std::string name, const Platform& platform) : CalcCustomCVForceKernel(name, platform), ixn(NULL) {
}
~ReferenceCalcCustomCVForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void copyState(ContextImpl& context, ContextImpl& innerContext);
private:
ReferenceCustomCVForce* ixn;
std::vector<std::string> globalParameterNames, energyParamDerivNames;
};
/** /**
* This kernel is invoked by VerletIntegrator to take one time step. * This kernel is invoked by VerletIntegrator to take one time step.
*/ */
......
...@@ -78,6 +78,8 @@ KernelImpl* ReferenceKernelFactory::createKernelImpl(std::string name, const Pla ...@@ -78,6 +78,8 @@ KernelImpl* ReferenceKernelFactory::createKernelImpl(std::string name, const Pla
return new ReferenceCalcCustomCentroidBondForceKernel(name, platform); return new ReferenceCalcCustomCentroidBondForceKernel(name, platform);
if (name == CalcCustomCompoundBondForceKernel::Name()) if (name == CalcCustomCompoundBondForceKernel::Name())
return new ReferenceCalcCustomCompoundBondForceKernel(name, platform); return new ReferenceCalcCustomCompoundBondForceKernel(name, platform);
if (name == CalcCustomCVForceKernel::Name())
return new ReferenceCalcCustomCVForceKernel(name, platform);
if (name == CalcCustomManyParticleForceKernel::Name()) if (name == CalcCustomManyParticleForceKernel::Name())
return new ReferenceCalcCustomManyParticleForceKernel(name, platform); return new ReferenceCalcCustomManyParticleForceKernel(name, platform);
if (name == CalcGayBerneForceKernel::Name()) if (name == CalcGayBerneForceKernel::Name())
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "ReferenceCustomBondIxn.h" #include "ReferenceCustomBondIxn.h"
#include "ReferenceCustomCentroidBondIxn.h" #include "ReferenceCustomCentroidBondIxn.h"
#include "ReferenceCustomCompoundBondIxn.h" #include "ReferenceCustomCompoundBondIxn.h"
#include "ReferenceCustomCVForce.h"
#include "ReferenceCustomDynamics.h" #include "ReferenceCustomDynamics.h"
#include "ReferenceCustomExternalIxn.h" #include "ReferenceCustomExternalIxn.h"
#include "ReferenceCustomGBIxn.h" #include "ReferenceCustomGBIxn.h"
...@@ -1473,7 +1474,8 @@ double ReferenceCalcCustomGBForceKernel::execute(ContextImpl& context, bool incl ...@@ -1473,7 +1474,8 @@ double ReferenceCalcCustomGBForceKernel::execute(ContextImpl& context, bool incl
if (periodic) if (periodic)
ixn.setPeriodic(extractBoxVectors(context)); ixn.setPeriodic(extractBoxVectors(context));
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, extractBoxVectors(context), periodic, nonbondedCutoff, 0.0); vector<set<int> > empty(context.getSystem().getNumParticles()); // Don't omit exclusions from the neighbor list
computeNeighborListVoxelHash(*neighborList, numParticles, posData, empty, extractBoxVectors(context), periodic, nonbondedCutoff, 0.0);
ixn.setUseCutoff(nonbondedCutoff, *neighborList); ixn.setUseCutoff(nonbondedCutoff, *neighborList);
} }
map<string, double> globalParameters; map<string, double> globalParameters;
...@@ -2015,6 +2017,44 @@ void ReferenceCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& cont ...@@ -2015,6 +2017,44 @@ void ReferenceCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& cont
ixn = new ReferenceGayBerneForce(force); ixn = new ReferenceGayBerneForce(force);
} }
ReferenceCalcCustomCVForceKernel::~ReferenceCalcCustomCVForceKernel() {
if (ixn != NULL)
delete ixn;
}
void ReferenceCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++)
energyParamDerivNames.push_back(force.getEnergyParameterDerivativeName(i));
ixn = new ReferenceCustomCVForce(force);
}
double ReferenceCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
copyState(context, innerContext);
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context);
double energy = 0;
map<string, double> globalParameters;
for (auto& name : globalParameterNames)
globalParameters[name] = context.getParameter(name);
map<string, double>& energyParamDerivs = extractEnergyParameterDerivatives(context);
ixn->calculateIxn(innerContext, posData, globalParameters, forceData, includeEnergy ? &energy : NULL, energyParamDerivs);
return energy;
}
void ReferenceCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
extractPositions(innerContext) = extractPositions(context);
extractVelocities(innerContext) = extractVelocities(context);
Vec3 a, b, c;
context.getPeriodicBoxVectors(a, b, c);
innerContext.setPeriodicBoxVectors(a, b, c);
innerContext.setTime(context.getTime());
map<string, double> innerParameters = innerContext.getParameters();
for (auto& param : innerParameters)
innerContext.setParameter(param.first, context.getParameter(param.first));
}
ReferenceIntegrateVerletStepKernel::~ReferenceIntegrateVerletStepKernel() { ReferenceIntegrateVerletStepKernel::~ReferenceIntegrateVerletStepKernel() {
if (dynamics) if (dynamics)
delete dynamics; delete dynamics;
......
...@@ -64,6 +64,7 @@ ReferencePlatform::ReferencePlatform() { ...@@ -64,6 +64,7 @@ ReferencePlatform::ReferencePlatform() {
registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory); registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory); registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory); registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment