Unverified Commit 6ed75b19 authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Add Cuda velocity verlet implementation, correct virtual site computation

parent 63a9e6c2
......@@ -1366,6 +1366,40 @@ private:
CUfunction kernel1, kernel2;
};
/*
* This kernel is invoked by VelocityVerletIntegrator to take one time step.
*/
class CudaIntegrateVelocityVerletStepKernel : public IntegrateVelocityVerletStepKernel {
public:
CudaIntegrateVelocityVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateVelocityVerletStepKernel(name, platform), cu(cu) { }
~CudaIntegrateVelocityVerletStepKernel() {}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the VelocityVerletIntegrator this kernel will be used for
*/
void initialize(const System& system, const VelocityVerletIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const VelocityVerletIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the VelocityVerletIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const VelocityVerletIntegrator& integrator);
private:
CudaContext& cu;
CUfunction kernel1, kernel2, kernel3;
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
*/
......@@ -1677,6 +1711,60 @@ private:
CUfunction kernel;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class CudaNoseHooverChainKernel : public NoseHooverChainKernel {
public:
CudaNoseHooverChainKernel(std::string name, const Platform& platform, CudaContext& cu) : NoseHooverChainKernel(name, platform), cu(cu) {
}
~CudaNoseHooverChainKernel() {}
/**
* Initialize the kernel.
*/
virtual void initialize();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergy the kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the velocity scale factor to apply to the particles associated with this heat bath.
*/
virtual double propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
virtual double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
*/
virtual double computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactor the multiplicative factor by which velocities are scaled.
*/
virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, double scaleFactor);
private:
CudaContext& cu;
CUfunction kernel;
};
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
......
......@@ -130,6 +130,10 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaIntegrateCustomStepKernel(name, platform, cu);
if (name == ApplyAndersenThermostatKernel::Name())
return new CudaApplyAndersenThermostatKernel(name, platform, cu);
if (name == NoseHooverChainKernel::Name())
return new CudaNoseHooverChainKernel(name, platform, cu);
if (name == IntegrateVelocityVerletStepKernel::Name())
return new CudaIntegrateVelocityVerletStepKernel(name, platform, cu);
if (name == ApplyMonteCarloBarostatKernel::Name())
return new CudaApplyMonteCarloBarostatKernel(name, platform, cu);
if (name == RemoveCMMotionKernel::Name())
......
......@@ -37,6 +37,7 @@
#include "openmm/internal/CustomManyParticleForceImpl.h"
#include "openmm/internal/CustomNonbondedForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/NoseHooverChainImpl.h"
#include "openmm/internal/OSRngSeed.h"
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
......@@ -7075,6 +7076,67 @@ double CudaIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& context,
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
void CudaIntegrateVelocityVerletStepKernel::initialize(const System& system, const VelocityVerletIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
map<string, string> defines;
CUmodule module = cu.createModule(CudaKernelSources::velocityVerlet, defines, "");
kernel1 = cu.getKernel(module, "integrateVelocityVerletPart1");
kernel2 = cu.getKernel(module, "integrateVelocityVerletPart2");
kernel3 = cu.getKernel(module, "integrateVelocityVerletPart3");
}
void CudaIntegrateVelocityVerletStepKernel::execute(ContextImpl& context, const VelocityVerletIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
int paddedNumAtoms = cu.getPaddedNumAtoms();
double dt = integrator.getStepSize();
cu.getIntegrationUtilities().setNextStepSize(dt);
//// Call the first integration kernel.
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel1, args1, numAtoms, 128);
//// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
//// Call the second integration kernel.
void* args2[] = {&numAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel2, args2, numAtoms, 128);
integration.computeVirtualSites();
//// Update forces
context.calcForcesAndEnergy(true, false);
//// Call the third integration kernel.
void* args3[] = {&numAtoms, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel3, args3, numAtoms, 128);
// TODO: Figure out if this is really needed. The constraint velocities are accounted for
// in a finite difference sense in the step 3 kernel, when the velocities are updated.
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
//// Update the time and step count.
cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateVelocityVerletStepKernel::computeKineticEnergy(ContextImpl& context, const VelocityVerletIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0);
}
void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
......@@ -8287,6 +8349,24 @@ void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
cu.executeKernel(kernel, args, cu.getNumAtoms());
}
void CudaNoseHooverChainKernel::initialize() {
}
double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep) {
return 1;
}
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
return 1;
}
double CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain) {
return 1;
}
void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, double scaleFactor) {
}
void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
cu.setAsCurrent();
savedPositions.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4), "savedPositions");
......
......@@ -96,12 +96,14 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVelocityVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateCustomStepKernel::Name(), factory);
registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
registerKernelFactory(NoseHooverChainKernel::Name(), factory);
registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
platformProperties.push_back(CudaDeviceIndex());
......
/**
* Perform the first step of Velocity Verlet integration.
*/
extern "C" __global__ void integrateVelocityVerletPart1(int numAtoms, int paddedNumAtoms, const mixed2* __restrict__ dt, const real4* __restrict__ posq,
const real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta) {
const mixed2 stepSize = dt[0];
const mixed dtPos = stepSize.y;
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
const mixed scale = 0.5f*dtVel/(mixed) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
velocity.x += scale*force[index]*velocity.w;
velocity.y += scale*force[index+paddedNumAtoms]*velocity.w;
velocity.z += scale*force[index+paddedNumAtoms*2]*velocity.w;
pos.x = velocity.x*dtPos;
pos.y = velocity.y*dtPos;
pos.z = velocity.z*dtPos;
posDelta[index] = pos;
velm[index] = velocity;
}
}
}
/**
* Perform the second step of Velocity Verlet integration.
*/
extern "C" __global__ void integrateVelocityVerletPart2(int numAtoms, mixed2* __restrict__ dt, real4* __restrict__ posq,
real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const mixed4* __restrict__ posDelta) {
mixed2 stepSize = dt[0];
int index = blockIdx.x*blockDim.x+threadIdx.x;
if (index == 0)
dt[0].x = stepSize.y;
for (; index < numAtoms; index += blockDim.x*gridDim.x) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
mixed4 delta = posDelta[index];
pos.x += delta.x;
pos.y += delta.y;
pos.z += delta.z;
#ifdef USE_MIXED_PRECISION
posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
}
}
/**
* Perform the third step of Velocity Verlet integration.
*/
extern "C" __global__ void integrateVelocityVerletPart3(int numAtoms, int paddedNumAtoms, mixed2* __restrict__ dt, real4* __restrict__ posq,
real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed4* __restrict__ posDelta) {
mixed2 stepSize = dt[0];
#if __CUDA_ARCH__ >= 130
double oneOverDt = 1.0/stepSize.y;
#else
float oneOverDt = 1.0f/stepSize.y;
float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
#endif
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
const mixed scale = 0.5f*dtVel/(mixed) 0x100000000;
int index = blockIdx.x*blockDim.x+threadIdx.x;
if (index == 0)
dt[0].x = stepSize.y;
for (; index < numAtoms; index += blockDim.x*gridDim.x) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 deltaXconstrained = posDelta[index];
velocity.x += scale*force[index]*velocity.w + (deltaXconstrained.x - velocity.x*stepSize.y)*oneOverDt;
velocity.y += scale*force[index+paddedNumAtoms]*velocity.w + (deltaXconstrained.y - velocity.y*stepSize.y)*oneOverDt;
velocity.z += scale*force[index+paddedNumAtoms*2]*velocity.w + (deltaXconstrained.z - velocity.z*stepSize.y)*oneOverDt;
#if __CUDA_ARCH__ < 130
velocity.x += (deltaXconstrained.x - velocity.x*stepSize.y)*correction;
velocity.y += (deltaXconstrained.y - velocity.y*stepSize.y)*correction;
velocity.z += (deltaXconstrained.z - velocity.z*stepSize.y)*correction;
#endif
velm[index] = velocity;
}
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Andreas Krämer and Andrew C. Simmmonett *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestVelocityVerletIntegrator.h"
void runPlatformTests() {
}
......@@ -109,6 +109,8 @@ void ReferenceVelocityVerletDynamics::update(OpenMM::ContextImpl &context, const
if (referenceConstraintAlgorithm)
referenceConstraintAlgorithm->apply(xPrime, atomCoordinates, inverseMasses, tolerance);
ReferenceVirtualSites::computePositions(system, atomCoordinates);
context.calcForcesAndEnergy(true, false);
for (int i = 0; i < numberOfAtoms; ++i) {
......@@ -129,6 +131,5 @@ void ReferenceVelocityVerletDynamics::update(OpenMM::ContextImpl &context, const
if (referenceConstraintAlgorithm)
referenceConstraintAlgorithm->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
ReferenceVirtualSites::computePositions(system, atomCoordinates);
incrementTimeStep();
}
......@@ -30,7 +30,7 @@
* -------------------------------------------------------------------------- */
#include "ReferenceTests.h"
#include "TestVerletIntegrator.h"
#include "TestVelocityVerletIntegrator.h"
void runPlatformTests() {
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment