/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009-2020 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "OpenCLIntegrationUtilities.h" #include "OpenCLContext.h" using namespace OpenMM; using namespace std; OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : IntegrationUtilities(context, system) { ccmaConvergedHostBuffer.initialize(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR); // Different communication mechanisms give optimal performance on AMD and on NVIDIA. string vendor = context.getDevice().getInfo(); ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc."); } OpenCLArray& OpenCLIntegrationUtilities::getPosDelta() { return dynamic_cast(context).unwrap(posDelta); } OpenCLArray& OpenCLIntegrationUtilities::getRandom() { return dynamic_cast(context).unwrap(random); } OpenCLArray& OpenCLIntegrationUtilities::getStepSize() { return dynamic_cast(context).unwrap(stepSize); } void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) { ComputeKernel settleKernel, shakeKernel, ccmaForceKernel; if (constrainVelocities) { settleKernel = settleVelKernel; shakeKernel = shakeVelKernel; ccmaForceKernel = ccmaVelForceKernel; } else { settleKernel = settlePosKernel; shakeKernel = shakePosKernel; ccmaForceKernel = ccmaPosForceKernel; } if (settleAtoms.isInitialized()) { if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) settleKernel->setArg(1, tol); else settleKernel->setArg(1, (float) tol); settleKernel->execute(settleAtoms.getSize()); } if (shakeAtoms.isInitialized()) { if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) shakeKernel->setArg(1, tol); else shakeKernel->setArg(1, (float) tol); shakeKernel->execute(shakeAtoms.getSize()); } if (ccmaConstraintAtoms.isInitialized()) { if (ccmaConstraintAtoms.getSize() <= 1024) { // Use the version of CCMA that runs in a single kernel with one workgroup. ccmaFullKernel->setArg(0, (int) constrainVelocities); if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) ccmaFullKernel->setArg(14, tol); else ccmaFullKernel->setArg(14, (float) tol); ccmaFullKernel->execute(128, 128); } else { // Use the version of CCMA that uses multiple kernels. ccmaForceKernel->setArg(6, ccmaConvergedHostBuffer); if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) ccmaForceKernel->setArg(7, tol); else ccmaForceKernel->setArg(7, (float) tol); ccmaDirectionsKernel->execute(ccmaConstraintAtoms.getSize()); const int checkInterval = 4; OpenCLContext& cl = dynamic_cast(context); cl::CommandQueue queue = cl.getQueue(); int* converged = (int*) context.getPinnedBuffer(); int* ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int)); ccmaConvergedHostMemory[0] = 0; queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory); ccmaUpdateKernel->setArg(4, constrainVelocities ? context.getVelm() : posDelta); for (int i = 0; i < 150; i++) { ccmaForceKernel->setArg(8, i); ccmaForceKernel->execute(ccmaConstraintAtoms.getSize()); cl::Event event; if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer) queue.enqueueReadBuffer(cl.unwrap(ccmaConverged).getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), converged, NULL, &event); ccmaMultiplyKernel->setArg(5, i); ccmaMultiplyKernel->execute(ccmaConstraintAtoms.getSize()); ccmaUpdateKernel->setArg(9, i); ccmaUpdateKernel->execute(context.getNumAtoms()); if ((i+1)%checkInterval == 0) { if (ccmaUseDirectBuffer) { ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event); queue.flush(); while (event.getInfo() != CL_COMPLETE) ; converged[i%2] = ccmaConvergedHostMemory[0]; queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory); } else event.wait(); if (converged[i%2]) break; } } } } } void OpenCLIntegrationUtilities::distributeForcesFromVirtualSites() { if (numVsites > 0) { vsiteForceKernel->setArg(2, context.getLongForceBuffer()); vsiteForceKernel->execute(numVsites); vsiteSaveForcesKernel->setArg(0, context.getLongForceBuffer()); vsiteSaveForcesKernel->setArg(1, context.getForceBuffers()); vsiteSaveForcesKernel->execute(context.getNumAtoms()); } }