Commit 8f5c9840 authored by peastman's avatar peastman
Browse files

Created CUDA implementation of DrudeSCFIntegrator

parent 7cbb8a01
......@@ -43,6 +43,7 @@ extern "C" void registerKernelFactories() {
CudaDrudeKernelFactory* factory = new CudaDrudeKernelFactory();
platform.registerKernelFactory(CalcDrudeForceKernel::Name(), factory);
platform.registerKernelFactory(IntegrateDrudeLangevinStepKernel::Name(), factory);
platform.registerKernelFactory(IntegrateDrudeSCFStepKernel::Name(), factory);
}
catch (std::exception ex) {
// Ignore
......@@ -65,5 +66,7 @@ KernelImpl* CudaDrudeKernelFactory::createKernelImpl(std::string name, const Pla
return new CudaCalcDrudeForceKernel(name, platform, cu);
if (name == IntegrateDrudeLangevinStepKernel::Name())
return new CudaIntegrateDrudeLangevinStepKernel(name, platform, cu);
if (name == IntegrateDrudeSCFStepKernel::Name())
return new CudaIntegrateDrudeSCFStepKernel(name, platform, cu);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
......@@ -311,3 +311,212 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
class CudaIntegrateDrudeSCFStepKernel::ReorderListener : public CudaContext::ReorderListener {
public:
ReorderListener(CudaContext& cu, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
cu(cu), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
}
void execute() {
const vector<int>& order = cu.getAtomIndex();
int numParticles = order.size();
vector<int> inverseOrder(numParticles);
for (int i = 0; i < numParticles; i++)
inverseOrder[order[i]] = i;
int numDrudeParticles = drudeParticles.size();
for (int i = 0; i < numDrudeParticles; i++)
reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
}
private:
CudaContext& cu;
const vector<int>& drudeParticles;
vector<int>& reorderedDrudeParticles;
};
CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
}
void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
// Identify Drude particles.
for (int i = 0; i < force.getNumParticles(); i++) {
int p, p1, p2, p3, p4;
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p);
reorderedDrudeParticles.push_back(p);
}
cu.addReorderListener(new ReorderListener(cu, drudeParticles, reorderedDrudeParticles));
// Initialize the energy minimizer.
minimizerPos = lbfgs_malloc(drudeParticles.size()*3);
if (minimizerPos == NULL)
throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
lbfgs_parameter_init(&minimizerParams);
minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
// Create the kernels.
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
kernel1 = cu.getKernel(module, "integrateVerletPart1");
kernel2 = cu.getKernel(module, "integrateVerletPart2");
prevStepSize = -1.0;
}
void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
double dt = integrator.getStepSize();
if (dt != prevStepSize) {
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
vector<double2> stepSizeVec(1);
stepSizeVec[0] = make_double2(dt, dt);
cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
else {
vector<float2> stepSizeVec(1);
stepSizeVec[0] = make_float2((float) dt, (float) dt);
cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
prevStepSize = dt;
}
// Call the first integration kernel.
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
void* args1[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel1, args1, numAtoms);
// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
// Call the second integration kernel.
void* args2[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel2, args2, numAtoms);
// Update the positions of virtual sites and Drude particles.
integration.computeVirtualSites();
minimize(context, integrator.getMinimizationErrorTolerance());
// Update the time and step count.
cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1);
}
double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
struct MinimizerData {
ContextImpl& context;
CudaContext& cu;
vector<int>& reorderedDrudeParticles;
MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& reorderedDrudeParticles) : context(context), cu(cu), reorderedDrudeParticles(reorderedDrudeParticles) {}
};
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context;
CudaContext& cu = data->cu;
vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles;
int numDrudeParticles = reorderedDrudeParticles.size();
// Set the particle positions.
cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
double4& p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
p.x = x[3*i]+offset.x*periodicBoxSize.x;
p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
float4& p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
p.x = x[3*i]+offset.x*periodicBoxSize.x;
p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
}
}
cu.getPosq().upload(cu.getPinnedBuffer());
// Compute the forces and energy for this configuration.
double energy = context.calcForcesAndEnergy(true, true);
long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force);
double forceScale = -1.0/0x100000000;
int paddedNumAtoms = cu.getPaddedNumAtoms();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = reorderedDrudeParticles[i];
g[3*i] = forceScale*force[index];
g[3*i+1] = forceScale*force[index+paddedNumAtoms];
g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
}
return energy;
}
void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions.
int numDrudeParticles = reorderedDrudeParticles.size();
cu.getPosq().download(cu.getPinnedBuffer());
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
double4 p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
float4 p = posq[reorderedDrudeParticles[i]];
int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
}
minimizerParams.xtol = 1e-7;
}
// Determine a normalization constant for scaling the tolerance.
double norm = 0.0;
for (int i = 0; i < 3*numDrudeParticles; i++)
norm += minimizerPos[i]*minimizerPos[i];
norm /= numDrudeParticles;
norm = (norm < 1 ? 1 : sqrt(norm));
minimizerParams.epsilon = tolerance/norm;
// Perform the minimization.
lbfgsfloatval_t fx;
MinimizerData data(context, cu, reorderedDrudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
}
\ No newline at end of file
......@@ -35,6 +35,7 @@
#include "openmm/DrudeKernels.h"
#include "CudaContext.h"
#include "CudaArray.h"
#include "lbfgs.h"
namespace OpenMM {
......@@ -115,6 +116,49 @@ private:
CUfunction kernel1, kernel2;
};
/**
* This kernel is invoked by DrudeSCFIntegrator to take one time step
*/
class CudaIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
public:
CudaIntegrateDrudeSCFStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateDrudeSCFStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateDrudeSCFStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the DrudeSCFIntegrator this kernel will be used for
* @param force the DrudeForce to get particle parameters from
*/
void initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the DrudeSCFIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const DrudeSCFIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the DrudeSCFIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private:
class ReorderListener;
void minimize(ContextImpl& context, double tolerance);
CudaContext& cu;
double prevStepSize;
std::vector<int> drudeParticles;
std::vector<int> reorderedDrudeParticles;
lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams;
CUfunction kernel1, kernel2;
};
} // namespace OpenMM
#endif /*CUDA_DRUDE_KERNELS_H_*/
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of DrudeSCFIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/VirtualSite.h"
#include "openmm/DrudeForce.h"
#include "openmm/DrudeSCFIntegrator.h"
#include "SimTKUtilities/SimTKOpenMMUtilities.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories();
void testWater() {
// Create a box of SWM4-NDP water molecules. This involves constraints, virtual sites,
// and Drude particles.
const int gridSize = 4;
const int numMolecules = gridSize*gridSize*gridSize;
const double spacing = 0.6;
const double boxSize = spacing*(gridSize+1);
System system;
NonbondedForce* nonbonded = new NonbondedForce();
DrudeForce* drude = new DrudeForce();
system.addForce(nonbonded);
system.addForce(drude);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
nonbonded->setCutoffDistance(1.0);
for (int i = 0; i < numMolecules; i++) {
int startIndex = system.getNumParticles();
system.addParticle(15.6); // O
system.addParticle(0.4); // D
system.addParticle(1.0); // H1
system.addParticle(1.0); // H2
system.addParticle(0.0); // M
nonbonded->addParticle(1.71636, 0.318395, 0.21094*4.184);
nonbonded->addParticle(-1.71636, 1, 0);
nonbonded->addParticle(0.55733, 1, 0);
nonbonded->addParticle(0.55733, 1, 0);
nonbonded->addParticle(-1.11466, 1, 0);
for (int j = 0; j < 5; j++)
for (int k = 0; k < j; k++)
nonbonded->addException(startIndex+j, startIndex+k, 0, 1, 0);
system.addConstraint(startIndex, startIndex+2, 0.09572);
system.addConstraint(startIndex, startIndex+3, 0.09572);
system.addConstraint(startIndex+2, startIndex+3, 0.15139);
system.setVirtualSite(startIndex+4, new ThreeParticleAverageSite(startIndex, startIndex+2, startIndex+3, 0.786646558, 0.106676721, 0.106676721));
drude->addParticle(startIndex+1, startIndex, -1, -1, -1, -1.71636, 1.71636*1.71636/(100000*4.184), 1, 1);
}
vector<Vec3> positions;
for (int i = 0; i < gridSize; i++)
for (int j = 0; j < gridSize; j++)
for (int k = 0; k < gridSize; k++) {
Vec3 pos(i*spacing, j*spacing, k*spacing);
positions.push_back(pos);
positions.push_back(pos);
positions.push_back(pos+Vec3(0.09572, 0, 0));
positions.push_back(pos+Vec3(-0.023999, 0.092663, 0));
positions.push_back(pos);
}
// Simulate it and check energy conservation and the total force on the Drude particles.
DrudeSCFIntegrator integ(0.0005);
Platform& platform = Platform::getPlatformByName("CUDA");
Context context(system, integ, platform);
context.setPositions(positions);
context.applyConstraints(1e-5);
context.setVelocitiesToTemperature(300.0);
State state = context.getState(State::Energy);
double initialEnergy;
int numSteps = 1000;
double maxNorm = (platform.getPropertyValue(context, "CudaPrecision") == "double" ? 1.0 : 5.0);
for (int i = 0; i < numSteps; i++) {
integ.step(1);
state = context.getState(State::Energy | State::Forces);
if (i == 0)
initialEnergy = state.getPotentialEnergy()+state.getKineticEnergy();
else
ASSERT_EQUAL_TOL(initialEnergy, state.getPotentialEnergy()+state.getKineticEnergy(), 0.01);
const vector<Vec3>& force = state.getForces();
double norm = 0.0;
for (int j = 1; j < (int) force.size(); j += 5)
norm += sqrt(force[j].dot(force[j]));
norm = (norm/numMolecules);
ASSERT(norm < maxNorm);
}
}
int main(int argc, char* argv[]) {
try {
registerDrudeCudaKernelFactories();
if (argc > 1)
Platform::getPlatformByName("CUDA").setPropertyDefaultValue("CudaPrecision", string(argv[1]));
testWater();
}
catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl;
std::cout << "FAIL - ERROR. Test failed." << std::endl;
return 1;
}
std::cout << "Done" << std::endl;
return 0;
}
......@@ -418,8 +418,9 @@ void ReferenceIntegrateDrudeSCFStepKernel::initialize(const System& system, cons
if (minimizerPos == NULL)
throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
lbfgs_parameter_init(&minimizerParams);
minimizerParams.max_iterations = 0;
minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
if (sizeof(RealOpenMM) < 8)
minimizerParams.xtol = 1e-7;
}
void ReferenceIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
......
......@@ -106,12 +106,15 @@ void testWater() {
context.applyConstraints(1e-5);
context.setVelocitiesToTemperature(300.0);
State state = context.getState(State::Energy);
double initialEnergy = state.getPotentialEnergy()+state.getKineticEnergy();
double initialEnergy;
int numSteps = 1000;
for (int i = 0; i < numSteps; i++) {
integ.step(1);
state = context.getState(State::Energy | State::Forces);
ASSERT_EQUAL_TOL(initialEnergy, state.getPotentialEnergy()+state.getKineticEnergy(), 0.01);
if (i == 0)
initialEnergy = state.getPotentialEnergy()+state.getKineticEnergy();
else
ASSERT_EQUAL_TOL(initialEnergy, state.getPotentialEnergy()+state.getKineticEnergy(), 0.01);
const vector<Vec3>& force = state.getForces();
double norm = 0.0;
for (int j = 1; j < (int) force.size(); j += 5)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment