Commit ad7acc66 authored by peastman's avatar peastman
Browse files

OpenCL implementation of BAOABLangevinIntegrator

parent 6aad09c3
......@@ -1383,6 +1383,47 @@ private:
cl::Kernel kernel1, kernel2;
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
*/
class OpenCLIntegrateBAOABStepKernel : public IntegrateBAOABStepKernel {
public:
OpenCLIntegrateBAOABStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateBAOABStepKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the BAOABLangevinIntegrator this kernel will be used for
*/
void initialize(const System& system, const BAOABLangevinIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the BAOABLangevinIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated.
* On exit, this should specify whether the cached forces are valid at the
* end of the step.
*/
void execute(ContextImpl& context, const BAOABLangevinIntegrator& integrator, bool& forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the BAOABLangevinIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const BAOABLangevinIntegrator& integrator);
private:
OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels;
OpenCLArray params, oldDelta;
cl::Kernel kernel1, kernel2, kernel3;
};
/**
* This kernel is invoked by BrownianIntegrator to take one time step.
*/
......
......@@ -118,6 +118,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLIntegrateVerletStepKernel(name, platform, cl);
if (name == IntegrateLangevinStepKernel::Name())
return new OpenCLIntegrateLangevinStepKernel(name, platform, cl);
if (name == IntegrateBAOABStepKernel::Name())
return new OpenCLIntegrateBAOABStepKernel(name, platform, cl);
if (name == IntegrateBrownianStepKernel::Name())
return new OpenCLIntegrateBrownianStepKernel(name, platform, cl);
if (name == IntegrateVariableVerletStepKernel::Name())
......
......@@ -7473,6 +7473,107 @@ double OpenCLIntegrateLangevinStepKernel::computeKineticEnergy(ContextImpl& cont
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
void OpenCLIntegrateBAOABStepKernel::initialize(const System& system, const BAOABLangevinIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::baoab, defines, "");
kernel1 = cl::Kernel(program, "integrateBAOABPart1");
kernel2 = cl::Kernel(program, "integrateBAOABPart2");
kernel3 = cl::Kernel(program, "integrateBAOABPart3");
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
params.initialize<cl_double>(cl, 3, "baoabParams");
oldDelta.initialize<mm_double4>(cl, cl.getPaddedNumAtoms(), "oldDelta");
}
else {
params.initialize<cl_float>(cl, 3, "baoabParams");
oldDelta.initialize<mm_float4>(cl, cl.getPaddedNumAtoms(), "oldDelta");
}
prevStepSize = -1.0;
}
void OpenCLIntegrateBAOABStepKernel::execute(ContextImpl& context, const BAOABLangevinIntegrator& integrator, bool& forcesAreValid) {
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
int numAtoms = cl.getNumAtoms();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, oldDelta.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel2, 1);
kernel2.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(4, oldDelta.getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, params.getDeviceBuffer());
kernel2.setArg<cl::Buffer>(6, integration.getStepSize().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(7, integration.getRandom().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel3, 1);
kernel3.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(3, cl.getForce().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(4, integration.getPosDelta().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(5, oldDelta.getDeviceBuffer());
kernel3.setArg<cl::Buffer>(6, integration.getStepSize().getDeviceBuffer());
}
if (!forcesAreValid) {
context.calcForcesAndEnergy(true, false);
forcesAreValid = true;
}
double temperature = integrator.getTemperature();
double friction = integrator.getFriction();
double stepSize = integrator.getStepSize();
cl.getIntegrationUtilities().setNextStepSize(stepSize);
if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
// Calculate the integration parameters.
double kT = BOLTZ*temperature;
double vscale = exp(-stepSize*friction);
double fscale = (friction == 0 ? stepSize : (1-vscale)/friction);
double noisescale = sqrt(kT*(1-vscale*vscale));
vector<cl_double> p(params.getSize());
p[0] = vscale;
p[1] = fscale;
p[2] = noisescale;
params.upload(p, true, true);
prevTemp = temperature;
prevFriction = friction;
prevStepSize = stepSize;
}
// Perform the integrator.
kernel2.setArg<cl_uint>(8, integration.prepareRandomNumbers(cl.getPaddedNumAtoms()));
cl.executeKernel(kernel1, numAtoms);
integration.applyConstraints(integrator.getConstraintTolerance());
cl.executeKernel(kernel2, numAtoms);
integration.applyConstraints(integrator.getConstraintTolerance());
context.calcForcesAndEnergy(true, false);
cl.executeKernel(kernel3, numAtoms);
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
integration.computeVirtualSites();
// Update the time and step count.
cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
// Reduce UI lag.
#ifdef WIN32
cl.getQueue().flush();
#endif
}
double OpenCLIntegrateBAOABStepKernel::computeKineticEnergy(ContextImpl& context, const BAOABLangevinIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0.0);
}
OpenCLIntegrateBrownianStepKernel::~OpenCLIntegrateBrownianStepKernel() {
}
......
......@@ -88,6 +88,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBAOABStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
......
enum {VelScale, ForceScale, NoiseScale, MaxParams};
/**
* Perform the first step of BAOAB integration.
*/
__kernel void integrateBAOABPart1(__global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta,
__global mixed4* restrict oldDelta, __global const mixed2* restrict dt) {
mixed halfdt = 0.5*dt[0].y;
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
velocity.x += halfdt*velocity.w*force[index].x;
velocity.y += halfdt*velocity.w*force[index].y;
velocity.z += halfdt*velocity.w*force[index].z;
velm[index] = velocity;
mixed4 delta = halfdt*velocity;
posDelta[index] = delta;
oldDelta[index] = delta;
}
}
}
/**
* Perform the second step of BAOAB integration.
*/
__kernel void integrateBAOABPart2(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global mixed4* restrict posDelta,
__global mixed4* restrict oldDelta, __global const mixed* restrict paramBuffer, __global const mixed2* restrict dt, __global const float4* restrict random, unsigned int randomIndex) {
mixed vscale = paramBuffer[VelScale];
mixed fscale = paramBuffer[ForceScale];
mixed noisescale = paramBuffer[NoiseScale];
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
int index = get_global_id(0);
randomIndex += index;
while (index < NUM_ATOMS) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
mixed sqrtInvMass = sqrt(velocity.w);
velocity.xyz += (delta.xyz-oldDelta[index].xyz)*invHalfdt;
velocity.x = vscale*velocity.x + noisescale*sqrtInvMass*random[randomIndex].x;
velocity.y = vscale*velocity.y + noisescale*sqrtInvMass*random[randomIndex].y;
velocity.z = vscale*velocity.z + noisescale*sqrtInvMass*random[randomIndex].z;
velm[index] = velocity;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
pos.xyz += delta.xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
delta = halfdt*velocity;
posDelta[index] = delta;
oldDelta[index] = delta;
}
randomIndex += get_global_size(0);
index += get_global_size(0);
}
}
/**
* Perform the third step of BAOAB integration.
*/
__kernel void integrateBAOABPart3(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm,
__global const real4* restrict force, __global mixed4* restrict posDelta, __global mixed4* restrict oldDelta, __global const mixed2* restrict dt) {
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
velocity.x += (delta.x-oldDelta[index].x)*invHalfdt + halfdt*velocity.w*force[index].x;
velocity.y += (delta.y-oldDelta[index].y)*invHalfdt + halfdt*velocity.w*force[index].y;
velocity.z += (delta.z-oldDelta[index].z)*invHalfdt + halfdt*velocity.w*force[index].z;
velm[index] = velocity;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
pos.xyz += delta.xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestBAOABLangevinIntegrator.h"
void runPlatformTests() {
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment