/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "OpenCLContext.h" #include "OpenCLArray.h" #include "openmm/Platform.h" #include #include using namespace OpenMM; using namespace std; OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceIndex) { // TODO Select the platform and device correctly context = cl::Context(CL_DEVICE_TYPE_CPU); device = context.getInfo()[0]; queue = cl::CommandQueue(context, device); numAtoms = numParticles; paddedNumAtoms = TileSize*((numParticles+TileSize-1)/TileSize); numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize; numTiles = numAtomBlocks*(numAtomBlocks+1)/2; numThreadBlocks = 8*device.getInfo(); forceBufferPerWarp = true; numForceBuffers = numThreadBlocks*ThreadBlockSize/TileSize; if (numForceBuffers >= numAtomBlocks) { // For small systems, it is more efficient to have one force buffer per block of 32 atoms instead of one per warp. forceBufferPerWarp = false; numForceBuffers = numAtomBlocks; } posq = new OpenCLArray(*this, paddedNumAtoms, "posq", true); velm = new OpenCLArray(*this, paddedNumAtoms, "velm", true); forceBuffers = new OpenCLArray(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false); force = new OpenCLArray(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true); atomIndex = new OpenCLArray(*this, paddedNumAtoms, "atomIndex", true); for (int i = 0; i < paddedNumAtoms; ++i) atomIndex->set(i, i); atomIndex->upload(); // Create utility kernels that are used in multiple places. utilities = createProgram(loadSourceFromFile("utilities.cl")); clearBufferKernel = cl::Kernel(utilities, "clearBuffer"); } OpenCLContext::~OpenCLContext() { delete posq; delete velm; delete force; delete atomIndex; } string OpenCLContext::loadSourceFromFile(const string& filename) const { ifstream file((Platform::getDefaultPluginsDirectory()+"/opencl/"+filename).c_str()); if (!file.is_open()) throw OpenMMException("Unable to load kernel: "+filename); string kernel; string line; while (!file.eof()) { getline(file, line); kernel += line; kernel += '\n'; } file.close(); return kernel; } cl::Program OpenCLContext::createProgram(const std::string source) { cl::Program::Sources sources(1, make_pair(source.c_str(), source.size())); cl::Program program(context, sources); try { program.build(vector(1, device)); } catch (cl::Error err) { throw OpenMMException("Error compiling kernel: "+program.getBuildInfo(device)); } return program; } void OpenCLContext::clearBuffer(OpenCLArray& array) { clearBufferKernel.setArg(0, array.getDeviceBuffer()); clearBufferKernel.setArg(1, array.getSize()); queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize)); } void OpenCLContext::clearBuffer(OpenCLArray& array) { clearBufferKernel.setArg(0, array.getDeviceBuffer()); clearBufferKernel.setArg(1, array.getSize()*4); queue.enqueueNDRangeKernel(clearBufferKernel, cl::NullRange, cl::NDRange(numThreadBlocks*ThreadBlockSize), cl::NDRange(ThreadBlockSize)); }