/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "OpenCLBondedUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include "openmm/OpenMMException.h"
#include "OpenCLNonbondedUtilities.h"
#include
using namespace OpenMM;
using namespace std;
OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), hasInitializedKernels(false) {
}
OpenCLBondedUtilities::~OpenCLBondedUtilities() {
for (int i = 0; i < (int) atomIndices.size(); i++)
delete atomIndices[i];
for (int i = 0; i < (int) bufferIndices.size(); i++)
delete bufferIndices[i];
}
void OpenCLBondedUtilities::addInteraction(const vector >& atoms, const string& source, int group) {
if (atoms.size() > 0) {
forceAtoms.push_back(atoms);
forceSource.push_back(source);
forceGroup.push_back(group);
int width = 1;
while (width < atoms[0].size())
width *= 2;
indexWidth.push_back(width);
}
}
std::string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) {
arguments.push_back(&data);
argTypes.push_back(type);
return "customArg"+OpenCLExpressionUtilities::intToString(arguments.size());
}
void OpenCLBondedUtilities::initialize(const System& system) {
int numForces = forceAtoms.size();
if (numForces == 0)
return;
// Build the lists of atom indicse and buffer indices.
vector > bufferVec(numForces);
vector > bufferCounter(numForces, vector(system.getNumParticles(), 0));
vector numBuffers(numForces, 0);
for (int i = 0; i < numForces; i++) {
int numBonds = forceAtoms[i].size();
int numAtoms = forceAtoms[i][0].size();
int width = indexWidth[i];
vector indexVec(width*numBonds);
for (int bond = 0; bond < numBonds; bond++) {
for (int atom = 0; atom < numAtoms; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][atom];
}
OpenCLArray* indices = new OpenCLArray(context, indexVec.size(), "bondedIndices");
indices->upload(indexVec);
atomIndices.push_back(indices);
bufferVec[i].resize(width*numBonds, 0);
for (int bond = 0; bond < numBonds; bond++) {
for (int atom = 0; atom < numAtoms; atom++)
bufferVec[i][bond*width+atom] = bufferCounter[i][forceAtoms[i][bond][atom]]++;
}
for (int j = 0; j < (int) bufferCounter[i].size(); j++)
numBuffers[i] = max(numBuffers[i], bufferCounter[i][j]);
}
// Figure out how many force buffers will be required.
for (int i = 0; i < numForces; i++)
numForceBuffers = max(numForceBuffers, numBuffers[i]);
int bufferLimit = max(numForceBuffers, (int) context.getPlatformData().contexts.size());
if (context.getNonbondedUtilities().getHasInteractions())
bufferLimit = max(bufferLimit, context.getNonbondedUtilities().getNumForceBuffers());
// For efficiency, we want to merge multiple forces into a single kernel - but only if that
// won't increase the number of force buffers. Figure out sets of forces that can be merged.
vector unmerged(numForces);
for (int i = 0; i < numForces; i++)
unmerged[i] = i;
for (int i = 0; i < numForces; i++)
for (int j = i-1; j >= 0; j--) {
if (numBuffers[unmerged[j]] <= numBuffers[unmerged[j+1]])
break;
int temp = unmerged[j+1];
unmerged[j+1] = unmerged[j];
unmerged[j] = temp;
}
while (unmerged.size() > 0) {
int sum = numBuffers[unmerged.back()];
int i;
for (i = 0; i < unmerged.size()-1; i++) {
if (sum+numBuffers[unmerged[i]] > bufferLimit)
break;
sum += numBuffers[unmerged[i]];
}
forceSets.push_back(vector());
for (int j = 0; j < i; j++)
forceSets.back().push_back(unmerged[j]);
forceSets.back().push_back(unmerged.back());
for (int j = 0; j < i; j++)
unmerged.erase(unmerged.begin());
unmerged.pop_back();
}
// Update the buffer indices based on merged sets.
bufferIndices.resize(numForces);
for (int i = 0; i < (int) forceSets.size(); i++)
for (int j = 0; j < forceSets[i].size(); j++) {
int force = forceSets[i][j];
int numBonds = forceAtoms[force].size();
int numAtoms = forceAtoms[force][0].size();
int width = indexWidth[force];
for (int k = 0; k < j; k++)
for (int bond = 0; bond < numBonds; bond++)
for (int atom = 0; atom < numAtoms; atom++)
bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]];
OpenCLArray* buffers = new OpenCLArray(context, bufferVec[force].size(), "bondedBufferIndices");
buffers->upload(bufferVec[force]);
bufferIndices[force] = buffers;
}
// Create the kernels.
for (vector >::const_iterator iter = forceSets.begin(); iter != forceSets.end(); ++iter) {
const vector& set = *iter;
int setSize = set.size();
stringstream s;
s<<"__kernel void computeBondedForces(__global float4* restrict forceBuffers, __global float* restrict energyBuffer, __global const float4* restrict posq, int groups";
for (int i = 0; i < setSize; i++) {
int force = set[i];
string indexType = "uint"+(indexWidth[force] == 1 ? "" : OpenCLExpressionUtilities::intToString(indexWidth[force]));
s<<", __global const "< defines;
defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
cl::Program program = context.createProgram(s.str(), defines);
kernels.push_back(cl::Kernel(program, "computeBondedForces"));
}
forceAtoms.clear();
forceSource.clear();
}
string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, int numAtoms, int group, const string& computeForce) {
maxBonds = max(maxBonds, numBonds);
int width = 1;
while (width < numAtoms)
width *= 2;
string suffix1[] = {""};
string suffix4[] = {".x", ".y", ".z", ".w"};
string suffix16[] = {".s0", ".s1", ".s2", ".s3", ".s4", ".s5", ".s6", ".s7",
".s8", ".s9", ".s10", ".s11", ".s12", ".s13", ".s14", ".s15"};
string* suffix;
if (width == 1)
suffix = suffix1;
else if (width <= 4)
suffix = suffix4;
else
suffix = suffix16;
string indexType = "uint"+(width == 1 ? "" : OpenCLExpressionUtilities::intToString(width));
stringstream s;
s<<"if ((groups&"<<(1<(index++, context.getForceBuffers().getDeviceBuffer());
kernel.setArg(index++, context.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(index++, context.getPosq().getDeviceBuffer());
index++;
for (int j = 0; j < (int) forceSets[i].size(); j++) {
kernel.setArg(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer());
kernel.setArg(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer());
}
for (int j = 0; j < (int) arguments.size(); j++)
kernel.setArg(index++, *arguments[j]);
}
}
for (int i = 0; i < (int) kernels.size(); i++) {
kernels[i].setArg(3, groups);
context.executeKernel(kernels[i], maxBonds);
}
}