Commit a4bf00ee authored by Peter Eastman's avatar Peter Eastman
Browse files

CUDA implementation of BAOABLangevinIntegrator

parent ad7acc66
......@@ -1401,6 +1401,45 @@ private:
CUfunction kernel1, kernel2;
};
/**
* This kernel is invoked by BAOABLangevinIntegrator to take one time step.
*/
class CudaIntegrateBAOABStepKernel : public IntegrateBAOABStepKernel {
public:
CudaIntegrateBAOABStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBAOABStepKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the BAOABLangevinIntegrator this kernel will be used for
*/
void initialize(const System& system, const BAOABLangevinIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the BAOABLangevinIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated.
* On exit, this should specify whether the cached forces are valid at the
* end of the step.
*/
void execute(ContextImpl& context, const BAOABLangevinIntegrator& integrator, bool& forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the BAOABLangevinIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const BAOABLangevinIntegrator& integrator);
private:
CudaContext& cu;
double prevTemp, prevFriction, prevStepSize;
CudaArray params, oldDelta;
CUfunction kernel1, kernel2, kernel3;
};
/**
* This kernel is invoked by BrownianIntegrator to take one time step.
*/
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -120,6 +120,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaIntegrateVerletStepKernel(name, platform, cu);
if (name == IntegrateLangevinStepKernel::Name())
return new CudaIntegrateLangevinStepKernel(name, platform, cu);
if (name == IntegrateBAOABStepKernel::Name())
return new CudaIntegrateBAOABStepKernel(name, platform, cu);
if (name == IntegrateBrownianStepKernel::Name())
return new CudaIntegrateBrownianStepKernel(name, platform, cu);
if (name == IntegrateVariableVerletStepKernel::Name())
......
......@@ -7143,6 +7143,90 @@ double CudaIntegrateLangevinStepKernel::computeKineticEnergy(ContextImpl& contex
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
void CudaIntegrateBAOABStepKernel::initialize(const System& system, const BAOABLangevinIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
CUmodule module = cu.createModule(CudaKernelSources::baoab, defines, "");
kernel1 = cu.getKernel(module, "integrateBAOABPart1");
kernel2 = cu.getKernel(module, "integrateBAOABPart2");
kernel3 = cu.getKernel(module, "integrateBAOABPart3");
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
params.initialize<double>(cu, 3, "baoabParams");
oldDelta.initialize<double4>(cu, cu.getPaddedNumAtoms(), "oldDelta");
}
else {
params.initialize<float>(cu, 3, "baoabParams");
oldDelta.initialize<float4>(cu, cu.getPaddedNumAtoms(), "oldDelta");
}
prevStepSize = -1.0;
}
void CudaIntegrateBAOABStepKernel::execute(ContextImpl& context, const BAOABLangevinIntegrator& integrator, bool& forcesAreValid) {
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
int paddedNumAtoms = cu.getPaddedNumAtoms();
if (!forcesAreValid) {
context.calcForcesAndEnergy(true, false);
forcesAreValid = true;
}
double temperature = integrator.getTemperature();
double friction = integrator.getFriction();
double stepSize = integrator.getStepSize();
cu.getIntegrationUtilities().setNextStepSize(stepSize);
if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
// Calculate the integration parameters.
double kT = BOLTZ*temperature;
double vscale = exp(-stepSize*friction);
double noisescale = sqrt(kT*(1-vscale*vscale));
vector<double> p(params.getSize());
p[0] = vscale;
p[1] = noisescale;
params.upload(p, true);
prevTemp = temperature;
prevFriction = friction;
prevStepSize = stepSize;
}
// Perform the integrator.
int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&oldDelta.getDevicePointer(), &integration.getStepSize().getDevicePointer()};
cu.executeKernel(kernel1, args1, numAtoms, 128);
integration.applyConstraints(integrator.getConstraintTolerance());
void* args2[] = {&numAtoms, &cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&oldDelta.getDevicePointer(), &params.getDevicePointer(), &integration.getStepSize().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
cu.executeKernel(kernel2, args2, numAtoms, 128);
integration.applyConstraints(integrator.getConstraintTolerance());
context.calcForcesAndEnergy(true, false);
void* args3[] = {&numAtoms, &paddedNumAtoms, &cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(),
&cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&oldDelta.getDevicePointer(), &integration.getStepSize().getDevicePointer()};
cu.executeKernel(kernel3, args3, numAtoms, 128);
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
integration.computeVirtualSites();
// Update the time and step count.
cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
// Reduce UI lag.
#ifdef WIN32
cu.getQueue().flush();
#endif
}
double CudaIntegrateBAOABStepKernel::computeKineticEnergy(ContextImpl& context, const BAOABLangevinIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.0);
}
void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -97,6 +97,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBAOABStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
......
enum {VelScale, NoiseScale};
/**
* Perform the first step of BAOAB integration.
*/
extern "C" __global__ void integrateBAOABPart1(int numAtoms, int paddedNumAtoms, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta,
mixed4* __restrict__ oldDelta, const mixed2* __restrict__ dt) {
mixed halfdt = 0.5*dt[0].y;
mixed fscale = halfdt/(mixed) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
velocity.x += fscale*velocity.w*force[index];
velocity.y += fscale*velocity.w*force[index+paddedNumAtoms];
velocity.z += fscale*velocity.w*force[index+paddedNumAtoms*2];
velm[index] = velocity;
mixed4 delta = make_mixed4(halfdt*velocity.x, halfdt*velocity.y, halfdt*velocity.z, 0);
posDelta[index] = delta;
oldDelta[index] = delta;
}
}
}
/**
* Perform the second step of BAOAB integration.
*/
extern "C" __global__ void integrateBAOABPart2(int numAtoms, real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, mixed4* __restrict__ posDelta,
mixed4* __restrict__ oldDelta, const mixed* __restrict__ paramBuffer, const mixed2* __restrict__ dt, const float4* __restrict__ random, unsigned int randomIndex) {
mixed vscale = paramBuffer[VelScale];
mixed noisescale = paramBuffer[NoiseScale];
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
int index = blockIdx.x*blockDim.x+threadIdx.x;
randomIndex += index;
while (index < numAtoms) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
mixed sqrtInvMass = SQRT(velocity.w);
velocity.x += (delta.x-oldDelta[index].x)*invHalfdt;
velocity.y += (delta.y-oldDelta[index].y)*invHalfdt;
velocity.z += (delta.z-oldDelta[index].z)*invHalfdt;
velocity.x = vscale*velocity.x + noisescale*sqrtInvMass*random[randomIndex].x;
velocity.y = vscale*velocity.y + noisescale*sqrtInvMass*random[randomIndex].y;
velocity.z = vscale*velocity.z + noisescale*sqrtInvMass*random[randomIndex].z;
velm[index] = velocity;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
pos.x += delta.x;
pos.y += delta.y;
pos.z += delta.z;
#ifdef USE_MIXED_PRECISION
posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
delta = make_mixed4(halfdt*velocity.x, halfdt*velocity.y, halfdt*velocity.z, 0);
posDelta[index] = delta;
oldDelta[index] = delta;
}
randomIndex += blockDim.x*gridDim.x;
index += blockDim.x*gridDim.x;
}
}
/**
* Perform the third step of BAOAB integration.
*/
extern "C" __global__ void integrateBAOABPart3(int numAtoms, int paddedNumAtoms, real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ velm,
const long long* __restrict__ force, mixed4* __restrict__ posDelta, mixed4* __restrict__ oldDelta, const mixed2* __restrict__ dt) {
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
mixed fscale = halfdt/(mixed) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
velocity.x += (delta.x-oldDelta[index].x)*invHalfdt + fscale*velocity.w*force[index];
velocity.y += (delta.y-oldDelta[index].y)*invHalfdt + fscale*velocity.w*force[index+paddedNumAtoms];
velocity.z += (delta.z-oldDelta[index].z)*invHalfdt + fscale*velocity.w*force[index+paddedNumAtoms*2];
velm[index] = velocity;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
pos.x += delta.x;
pos.y += delta.y;
pos.z += delta.z;
#ifdef USE_MIXED_PRECISION
posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestBAOABLangevinIntegrator.h"
void runPlatformTests() {
}
......@@ -1384,7 +1384,7 @@ private:
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
* This kernel is invoked by BAOABLangevinIntegrator to take one time step.
*/
class OpenCLIntegrateBAOABStepKernel : public IntegrateBAOABStepKernel {
public:
......
......@@ -7484,11 +7484,11 @@ void OpenCLIntegrateBAOABStepKernel::initialize(const System& system, const BAOA
kernel2 = cl::Kernel(program, "integrateBAOABPart2");
kernel3 = cl::Kernel(program, "integrateBAOABPart3");
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
params.initialize<cl_double>(cl, 3, "baoabParams");
params.initialize<cl_double>(cl, 2, "baoabParams");
oldDelta.initialize<mm_double4>(cl, cl.getPaddedNumAtoms(), "oldDelta");
}
else {
params.initialize<cl_float>(cl, 3, "baoabParams");
params.initialize<cl_float>(cl, 2, "baoabParams");
oldDelta.initialize<mm_float4>(cl, cl.getPaddedNumAtoms(), "oldDelta");
}
prevStepSize = -1.0;
......@@ -7533,12 +7533,10 @@ void OpenCLIntegrateBAOABStepKernel::execute(ContextImpl& context, const BAOABLa
double kT = BOLTZ*temperature;
double vscale = exp(-stepSize*friction);
double fscale = (friction == 0 ? stepSize : (1-vscale)/friction);
double noisescale = sqrt(kT*(1-vscale*vscale));
vector<cl_double> p(params.getSize());
p[0] = vscale;
p[1] = fscale;
p[2] = noisescale;
p[1] = noisescale;
params.upload(p, true, true);
prevTemp = temperature;
prevFriction = friction;
......
enum {VelScale, ForceScale, NoiseScale, MaxParams};
enum {VelScale, NoiseScale};
/**
* Perform the first step of BAOAB integration.
......@@ -28,7 +28,6 @@ __kernel void integrateBAOABPart1(__global mixed4* restrict velm, __global const
__kernel void integrateBAOABPart2(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global mixed4* restrict posDelta,
__global mixed4* restrict oldDelta, __global const mixed* restrict paramBuffer, __global const mixed2* restrict dt, __global const float4* restrict random, unsigned int randomIndex) {
mixed vscale = paramBuffer[VelScale];
mixed fscale = paramBuffer[ForceScale];
mixed noisescale = paramBuffer[NoiseScale];
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
......@@ -38,7 +37,7 @@ __kernel void integrateBAOABPart2(__global real4* restrict posq, __global real4*
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
mixed sqrtInvMass = sqrt(velocity.w);
mixed sqrtInvMass = SQRT(velocity.w);
velocity.xyz += (delta.xyz-oldDelta[index].xyz)*invHalfdt;
velocity.x = vscale*velocity.x + noisescale*sqrtInvMass*random[randomIndex].x;
velocity.y = vscale*velocity.y + noisescale*sqrtInvMass*random[randomIndex].y;
......
......@@ -1172,7 +1172,7 @@ private:
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
* This kernel is invoked by BAOABLangevinIntegrator to take one time step.
*/
class ReferenceIntegrateBAOABStepKernel : public IntegrateBAOABStepKernel {
public:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment