/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "OpenCLBondedUtilities.h" #include "OpenCLExpressionUtilities.h" #include "openmm/OpenMMException.h" #include "OpenCLNonbondedUtilities.h" #include using namespace OpenMM; using namespace std; OpenCLBondedUtilities::OpenCLBondedUtilities(OpenCLContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) { } OpenCLBondedUtilities::~OpenCLBondedUtilities() { for (int i = 0; i < (int) atomIndices.size(); i++) delete atomIndices[i]; for (int i = 0; i < (int) bufferIndices.size(); i++) delete bufferIndices[i]; } void OpenCLBondedUtilities::addInteraction(const vector >& atoms, const string& source, int group) { if (atoms.size() > 0) { forceAtoms.push_back(atoms); forceSource.push_back(source); forceGroup.push_back(group); allGroups |= 1< > bufferVec(numForces); vector > bufferCounter(numForces, vector(system.getNumParticles(), 0)); vector numBuffers(numForces, 0); for (int i = 0; i < numForces; i++) { int numBonds = forceAtoms[i].size(); int numAtoms = forceAtoms[i][0].size(); int width = indexWidth[i]; vector indexVec(width*numBonds); for (int bond = 0; bond < numBonds; bond++) { for (int atom = 0; atom < numAtoms; atom++) indexVec[bond*width+atom] = forceAtoms[i][bond][atom]; } OpenCLArray* indices = OpenCLArray::create(context, indexVec.size(), "bondedIndices"); indices->upload(indexVec); atomIndices.push_back(indices); bufferVec[i].resize(width*numBonds, 0); for (int bond = 0; bond < numBonds; bond++) { for (int atom = 0; atom < numAtoms; atom++) bufferVec[i][bond*width+atom] = bufferCounter[i][forceAtoms[i][bond][atom]]++; } for (int j = 0; j < (int) bufferCounter[i].size(); j++) numBuffers[i] = max(numBuffers[i], bufferCounter[i][j]); } // For efficiency, we want to merge multiple forces into a single kernel - but only if that // won't increase the number of force buffers. if (context.getSupports64BitGlobalAtomics()) { // Put all the forces in the same set. numForceBuffers = 1; forceSets.push_back(vector()); for (int i = 0; i < numForces; i++) forceSets[0].push_back(i); } else { // Figure out how many force buffers will be required. for (int i = 0; i < numForces; i++) numForceBuffers = max(numForceBuffers, numBuffers[i]); int bufferLimit = max(numForceBuffers, (int) context.getPlatformData().contexts.size()); if (context.getNonbondedUtilities().getHasInteractions()) bufferLimit = max(bufferLimit, context.getNonbondedUtilities().getNumForceBuffers()); // Figure out sets of forces that can be merged. vector unmerged(numForces); for (int i = 0; i < numForces; i++) unmerged[i] = i; for (int i = 0; i < numForces; i++) for (int j = i-1; j >= 0; j--) { if (numBuffers[unmerged[j]] <= numBuffers[unmerged[j+1]]) break; int temp = unmerged[j+1]; unmerged[j+1] = unmerged[j]; unmerged[j] = temp; } while (unmerged.size() > 0) { int sum = numBuffers[unmerged.back()]; int i; for (i = 0; i < (int) unmerged.size()-1; i++) { if (sum+numBuffers[unmerged[i]] > bufferLimit) break; sum += numBuffers[unmerged[i]]; } forceSets.push_back(vector()); for (int j = 0; j < i; j++) forceSets.back().push_back(unmerged[j]); forceSets.back().push_back(unmerged.back()); for (int j = 0; j < i; j++) unmerged.erase(unmerged.begin()); unmerged.pop_back(); } } // Update the buffer indices based on merged sets. bufferIndices.resize(numForces); for (int i = 0; i < (int) forceSets.size(); i++) for (int j = 0; j < (int) forceSets[i].size(); j++) { int force = forceSets[i][j]; int numBonds = forceAtoms[force].size(); int numAtoms = forceAtoms[force][0].size(); int width = indexWidth[force]; for (int k = 0; k < j; k++) for (int bond = 0; bond < numBonds; bond++) for (int atom = 0; atom < numAtoms; atom++) bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]]; OpenCLArray* buffers = OpenCLArray::create(context, bufferVec[force].size(), "bondedBufferIndices"); buffers->upload(bufferVec[force]); bufferIndices[force] = buffers; } // Create the kernels. for (auto& set : forceSets) { int setSize = set.size(); stringstream s; s<<"#ifdef SUPPORTS_64_BIT_ATOMICS\n"; s<<"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"; s<<"#endif\n"; for (int i = 0; i < (int) prefixCode.size(); i++) s< 0) s<<", __global mixed* restrict energyParamDerivs"; s<<") {\n"; s<<"mixed energy = 0;\n"; for (int i = 0; i < energyParameterDerivatives.size(); i++) s<<"mixed energyParamDeriv"<& allParamDerivNames = context.getEnergyParamDerivNames(); int numDerivs = allParamDerivNames.size(); for (int i = 0; i < energyParameterDerivatives.size(); i++) for (int index = 0; index < numDerivs; index++) if (allParamDerivNames[index] == energyParameterDerivatives[i]) s<<"energyParamDerivs[get_global_id(0)*"< defines; defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms()); cl::Program program = context.createProgram(s.str(), defines); kernels.push_back(cl::Kernel(program, "computeBondedForces")); } forceAtoms.clear(); forceSource.clear(); } string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, int numAtoms, int group, const string& computeForce) { maxBonds = max(maxBonds, numBonds); int width = 1; while (width < numAtoms) width *= 2; string suffix1[] = {""}; string suffix4[] = {".x", ".y", ".z", ".w"}; string suffix16[] = {".s0", ".s1", ".s2", ".s3", ".s4", ".s5", ".s6", ".s7", ".s8", ".s9", ".s10", ".s11", ".s12", ".s13", ".s14", ".s15"}; string* suffix; if (width == 1) suffix = suffix1; else if (width <= 4) suffix = suffix4; else suffix = suffix16; string indexType = "uint"+(width == 1 ? "" : context.intToString(width)); stringstream s; s<<"if ((groups&"<<(1<(index++, context.getLongForceBuffer().getDeviceBuffer()); else kernel.setArg(index++, context.getForceBuffers().getDeviceBuffer()); kernel.setArg(index++, context.getEnergyBuffer().getDeviceBuffer()); kernel.setArg(index++, context.getPosq().getDeviceBuffer()); index += 6; for (int j = 0; j < (int) forceSets[i].size(); j++) { kernel.setArg(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer()); kernel.setArg(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer()); } for (int j = 0; j < (int) arguments.size(); j++) kernel.setArg(index++, *arguments[j]); if (energyParameterDerivatives.size() > 0) kernel.setArg(index++, context.getEnergyParamDerivBuffer().getDeviceBuffer()); } } for (int i = 0; i < (int) kernels.size(); i++) { cl::Kernel& kernel = kernels[i]; kernel.setArg(3, groups); if (context.getUseDoublePrecision()) { kernel.setArg(4, context.getPeriodicBoxSizeDouble()); kernel.setArg(5, context.getInvPeriodicBoxSizeDouble()); kernel.setArg(6, context.getPeriodicBoxVecXDouble()); kernel.setArg(7, context.getPeriodicBoxVecYDouble()); kernel.setArg(8, context.getPeriodicBoxVecZDouble()); } else { kernel.setArg(4, context.getPeriodicBoxSize()); kernel.setArg(5, context.getInvPeriodicBoxSize()); kernel.setArg(6, context.getPeriodicBoxVecX()); kernel.setArg(7, context.getPeriodicBoxVecY()); kernel.setArg(8, context.getPeriodicBoxVecZ()); } context.executeKernel(kernels[i], maxBonds); } }