Unverified Commit d80ddda3 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Faster algorithm for DrudeSCFIntegrator (#4354)

* Faster algorithm for DrudeSCFIntegrator

* Minor code simplification

* Reduced default error tolerance

* Fixed errors on NVIDIA
parent 4c107329
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Portions copyright (c) 2008-2023 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -46,7 +46,7 @@ DrudeSCFIntegrator::DrudeSCFIntegrator(double stepSize) : DrudeIntegrator(stepSi
{
setDrudeTemperature(0.0); // This is only used to initialize velocities for this integrator
setStepSize(stepSize);
setMinimizationErrorTolerance(0.1);
setMinimizationErrorTolerance(1.0);
setConstraintTolerance(1e-5);
setMaxDrudeDistance(0.0);
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2020 Stanford University and the Authors. *
* Portions copyright (c) 2013-2023 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -375,36 +375,43 @@ double CommonIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl&
}
CommonIntegrateDrudeSCFStepKernel::~CommonIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL)
lbfgs_free(minimizerPos);
}
void CommonIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
cc.initializeContexts();
ContextSelector selector(cc);
// Identify Drude particles.
for (int i = 0; i < force.getNumParticles(); i++) {
int numDrude = force.getNumParticles();
drudeParams.initialize<mm_float4>(cc, numDrude, "drudeParams");
drudeIndices.initialize<int>(cc, numDrude, "drudeIndices");
drudeParents.initialize<mm_int4>(cc, numDrude, "drudeParents");
vector<mm_float4> paramVec(numDrude);
vector<mm_int4> parentVec(numDrude);
drudeIndexVec.resize(numDrude);
for (int i = 0; i < numDrude; i++) {
int p, p1, p2, p3, p4;
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p);
double a1 = (p2 == -1 ? 1 : aniso12);
double a2 = (p3 == -1 || p4 == -1 ? 1 : aniso34);
double a3 = 3-a1-a2;
double k3 = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
double k1 = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3;
double k2 = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3;
paramVec[i] = mm_float4((float) k1, (float) k2, (float) k3, 0.0f);
drudeIndexVec[i] = p;
parentVec[i] = mm_int4(p1, p2, p3, p4);
}
// Initialize the energy minimizer.
minimizerPos = lbfgs_malloc(drudeParticles.size()*3);
if (minimizerPos == NULL)
throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
lbfgs_parameter_init(&minimizerParams);
minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
drudeParams.upload(paramVec);
drudeIndices.upload(drudeIndexVec);
drudeParents.upload(parentVec);
// Create the kernels.
ComputeProgram program = cc.compileProgram(CommonKernelSources::verlet);
kernel1 = program->createKernel("integrateVerletPart1");
kernel2 = program->createKernel("integrateVerletPart2");
program = cc.compileProgram(CommonDrudeKernelSources::drudeSCF);
minimizeKernel = program->createKernel("minimizeDrudePositions");
prevStepSize = -1.0;
}
......@@ -431,6 +438,14 @@ void CommonIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud
kernel2->addArg(integration.getPosDelta());
if (cc.getUseMixedPrecision())
kernel2->addArg(cc.getPosqCorrection());
minimizeKernel->addArg((int) drudeParams.getSize());
minimizeKernel->addArg(cc.getPaddedNumAtoms());
minimizeKernel->addArg();
minimizeKernel->addArg(cc.getPosq());
minimizeKernel->addArg(cc.getLongForceBuffer());
minimizeKernel->addArg(drudeParams);
minimizeKernel->addArg(drudeIndices);
minimizeKernel->addArg(drudeParents);
}
if (dt != prevStepSize) {
if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) {
......@@ -480,96 +495,24 @@ double CommonIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& cont
return cc.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
struct MinimizerData {
ContextImpl& context;
ComputeContext& cc;
vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, ComputeContext& cc, vector<int>& drudeParticles) : context(context), cc(cc), drudeParticles(drudeParticles) {}
};
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context;
ComputeContext& cc = data->cc;
vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = drudeParticles.size();
// Set the particle positions.
cc.getPosq().download(cc.getPinnedBuffer());
if (cc.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
else {
mm_float4* posq = (mm_float4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
cc.getPosq().upload(cc.getPinnedBuffer());
// Compute the forces and energy for this configuration.
double energy = context.calcForcesAndEnergy(true, true, context.getIntegrator().getIntegrationForceGroups());
long long* force = (long long*) cc.getPinnedBuffer();
cc.getLongForceBuffer().download(force);
double forceScale = -1.0/0x100000000;
int paddedNumAtoms = cc.getPaddedNumAtoms();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = drudeParticles[i];
g[3*i] = forceScale*force[index];
g[3*i+1] = forceScale*force[index+paddedNumAtoms];
g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
}
return energy;
}
void CommonIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions.
int numDrudeParticles = drudeParticles.size();
cc.getPosq().download(cc.getPinnedBuffer());
if (cc.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
}
}
else {
mm_float4* posq = (mm_float4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
minimizeKernel->setArg(2, (float) tolerance);
long long* forces = (long long*) cc.getPinnedBuffer();
double scale = 1/(double) 0x100000000;
double lastForce = 0;
int numDrude = drudeParams.getSize();
int paddedNumAtoms = cc.getPaddedNumAtoms();
for (int iteration = 0; iteration < 50; iteration++) {
context.calcForcesAndEnergy(true, false, context.getIntegrator().getIntegrationForceGroups());
minimizeKernel->execute(drudeParams.getSize());
cc.getLongForceBuffer().download(forces);
double totalForce = 0;
for (int i : drudeIndexVec) {
Vec3 f(scale*forces[i], scale*forces[i+paddedNumAtoms], scale*forces[i+paddedNumAtoms*2]);
totalForce += f.dot(f);
}
minimizerParams.xtol = 1e-7;
if (sqrt(totalForce/(3*numDrude)) < tolerance || (iteration > 0 && totalForce > 0.9*lastForce))
break;
lastForce = totalForce;
}
// Determine a normalization constant for scaling the tolerance.
double norm = 0.0;
for (int i = 0; i < 3*numDrudeParticles; i++)
norm += minimizerPos[i]*minimizerPos[i];
norm /= numDrudeParticles;
norm = (norm < 1 ? 1 : sqrt(norm));
minimizerParams.epsilon = tolerance/norm;
// Perform the minimization.
lbfgsfloatval_t fx;
MinimizerData data(context, cc, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
}
\ No newline at end of file
}
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2019 Stanford University and the Authors. *
* Portions copyright (c) 2013-2023 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -35,7 +35,6 @@
#include "openmm/DrudeKernels.h"
#include "openmm/common/ComputeContext.h"
#include "openmm/common/ComputeArray.h"
#include "lbfgs.h"
namespace OpenMM {
......@@ -121,7 +120,7 @@ private:
class CommonIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
public:
CommonIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, ComputeContext& cc) :
IntegrateDrudeSCFStepKernel(name, platform), cc(cc), minimizerPos(NULL), hasInitializedKernels(false) {
IntegrateDrudeSCFStepKernel(name, platform), cc(cc), hasInitializedKernels(false) {
}
~CommonIntegrateDrudeSCFStepKernel();
/**
......@@ -151,10 +150,9 @@ private:
ComputeContext& cc;
double prevStepSize;
bool hasInitializedKernels;
std::vector<int> drudeParticles;
lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams;
ComputeKernel kernel1, kernel2;
std::vector<int> drudeIndexVec;
ComputeArray drudeParams, drudeIndices, drudeParents;
ComputeKernel kernel1, kernel2, minimizeKernel;
};
} // namespace OpenMM
......
KERNEL void minimizeDrudePositions(int numDrude, int paddedNumAtoms, float tolerance, GLOBAL real4* RESTRICT posq,
GLOBAL const mm_long* RESTRICT force, GLOBAL float4* RESTRICT drudeParams, GLOBAL int* RESTRICT drudeIndex,
GLOBAL int4* RESTRICT drudeParents) {
const real scale = 1/(real) 0x100000000;
for (int i = GLOBAL_ID; i < numDrude; i += GLOBAL_SIZE) {
int index = drudeIndex[i];
int4 parents = drudeParents[i];
float4 params = drudeParams[i];
real3 fscale = make_real3(params.z, params.z, params.z);
if (parents.y != -1) {
real3 dir = trimTo3(posq[parents.x]-posq[parents.y]);
dir *= RSQRT(dot(dir, dir));
fscale += params.x*dir;
}
if (parents.z != -1 && parents.w != -1) {
real3 dir = trimTo3(posq[parents.z]-posq[parents.w]);
dir *= RSQRT(dot(dir, dir));
fscale += params.y*dir;
}
real4 pos = posq[index];
real4 f = make_real4(scale*force[index], scale*force[index+paddedNumAtoms], scale*force[index+paddedNumAtoms*2], 0);
real damping = (SQRT(f.x*f.x + f.y*f.y + f.z*f.z) > 10*tolerance ? 0.5f : 1.0f);
pos.x += damping*f.x/fscale.x;
pos.y += damping*f.y/fscale.y;
pos.z += damping*f.z/fscale.z;
posq[index] = pos;
}
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Portions copyright (c) 2011-2023 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -384,18 +384,29 @@ double ReferenceIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextIm
}
ReferenceIntegrateDrudeSCFStepKernel::~ReferenceIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL)
lbfgs_free(minimizerPos);
}
void ReferenceIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
// Identify Drude particles.
for (int i = 0; i < force.getNumParticles(); i++) {
int p, p1, p2, p3, p4;
// Record information about the drude particles.
int numDrude = force.getNumParticles();
particle.resize(numDrude);
particle1.resize(numDrude);
particle2.resize(numDrude);
particle3.resize(numDrude);
particle4.resize(numDrude);
k1.resize(numDrude);
k2.resize(numDrude);
k3.resize(numDrude);
for (int i = 0; i < numDrude; i++) {
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p);
force.getParticleParameters(i, particle[i], particle1[i], particle2[i], particle3[i], particle4[i], charge, polarizability, aniso12, aniso34);
double a1 = (particle2[i] == -1 ? 1 : aniso12);
double a2 = (particle3[i] == -1 || particle4[i] == -1 ? 1 : aniso34);
double a3 = 3-a1-a2;
k3[i] = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
k1[i] = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3[i];
k2[i] = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3[i];
}
// Record particle masses.
......@@ -406,16 +417,6 @@ void ReferenceIntegrateDrudeSCFStepKernel::initialize(const System& system, cons
particleMass.push_back(mass);
particleInvMass.push_back(mass == 0.0 ? 0.0 : 1.0/mass);
}
// Initialize the energy minimizer.
minimizerPos = lbfgs_malloc(drudeParticles.size()*3);
if (minimizerPos == NULL)
throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
lbfgs_parameter_init(&minimizerParams);
minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
if (sizeof(double) < 8)
minimizerParams.xtol = 1e-7;
}
void ReferenceIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
......@@ -461,54 +462,40 @@ double ReferenceIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& c
return computeShiftedKineticEnergy(context, particleInvMass, 0.5*integrator.getStepSize());
}
struct MinimizerData {
ContextImpl& context;
vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, vector<int>& drudeParticles) : context(context), drudeParticles(drudeParticles) {}
};
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context;
vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = drudeParticles.size();
// Compute the force and energy for this configuration.
vector<Vec3>& pos = extractPositions(context);
vector<Vec3>& force = extractForces(context);
for (int i = 0; i < numDrudeParticles; i++)
pos[drudeParticles[i]] = Vec3(x[3*i], x[3*i+1], x[3*i+2]);
double energy = context.calcForcesAndEnergy(true, true, context.getIntegrator().getIntegrationForceGroups());
for (int i = 0; i < numDrudeParticles; i++) {
Vec3 f = force[drudeParticles[i]];
g[3*i] = -f[0];
g[3*i+1] = -f[1];
g[3*i+2] = -f[2];
}
return energy;
}
void ReferenceIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions and determine a normalization constant for scaling the tolerance.
vector<Vec3>& pos = extractPositions(context);
int numDrudeParticles = drudeParticles.size();
double norm = 0.0;
for (int i = 0; i < numDrudeParticles; i++) {
Vec3 p = pos[drudeParticles[i]];
minimizerPos[3*i] = p[0];
minimizerPos[3*i+1] = p[1];
minimizerPos[3*i+2] = p[2];
norm += p.dot(p);
vector<Vec3>& force = extractForces(context);
int numDrude = particle.size();
double lastForce = 0;
for (int iteration = 0; iteration < 50; iteration++) {
context.calcForcesAndEnergy(true, false, context.getIntegrator().getIntegrationForceGroups());
double totalForce = 0;
for (int i = 0; i < numDrude; i++) {
int p = particle[i];
int p1 = particle1[i];
int p2 = particle2[i];
int p3 = particle3[i];
int p4 = particle4[i];
Vec3 fscale(k3[i], k3[i], k3[i]);
if (p2 != -1) {
Vec3 dir = pos[p1]-pos[p2];
dir /= sqrt(dir.dot(dir));;
fscale += k1[i]*dir;
}
if (p3 != -1 && p4 != -1) {
Vec3 dir = pos[p3]-pos[p4];
dir /= sqrt(dir.dot(dir));;
fscale += k2[i]*dir;
}
Vec3 f = force[p];
double f2 = f.dot(f);
totalForce += f2;
double damping = (sqrt(f2) > 10*tolerance ? 0.5 : 1.0);
for (int i = 0; i < 3; i++)
pos[p][i] += damping*f[i]/fscale[i];
}
if (sqrt(totalForce/(3*numDrude)) < tolerance || (iteration > 0 && totalForce > 0.9*lastForce))
break;
lastForce = totalForce;
}
norm /= numDrudeParticles;
norm = (norm < 1 ? 1 : sqrt(norm));
minimizerParams.epsilon = tolerance/norm;
// Perform the minimization.
lbfgsfloatval_t fx;
MinimizerData data(context, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
}
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Portions copyright (c) 2013-2023 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -35,7 +35,6 @@
#include "ReferencePlatform.h"
#include "openmm/DrudeKernels.h"
#include "openmm/Vec3.h"
#include "lbfgs.h"
#include <utility>
#include <vector>
......@@ -127,7 +126,7 @@ private:
class ReferenceIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
public:
ReferenceIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, ReferencePlatform::PlatformData& data) :
IntegrateDrudeSCFStepKernel(name, platform), data(data), minimizerPos(NULL) {
IntegrateDrudeSCFStepKernel(name, platform), data(data) {
}
~ReferenceIntegrateDrudeSCFStepKernel();
/**
......@@ -155,11 +154,10 @@ public:
private:
void minimize(ContextImpl& context, double tolerance);
ReferencePlatform::PlatformData& data;
std::vector<int> drudeParticles;
std::vector<double> particleInvMass;
lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams;
double maxDrudeDistance;
std::vector<int> particle, particle1, particle2, particle3, particle4;
std::vector<double> k1, k2, k3;
};
} // namespace OpenMM
......
......@@ -100,6 +100,7 @@ void testWater() {
// Simulate it and check energy conservation and the total force on the Drude particles.
DrudeSCFIntegrator integ(0.0005);
integ.setMinimizationErrorTolerance(0.1);
Context context(system, integ, platform);
context.setPositions(positions);
context.applyConstraints(1e-5);
......
......@@ -1050,10 +1050,10 @@ class TestAPIUnits(unittest.TestCase):
integrator = DrudeSCFIntegrator(0.002)
self.assertEqual(integrator.getStepSize(), 0.002*picoseconds)
self.assertAlmostEqualUnit(integrator.getMinimizationErrorTolerance(),
0.1*kilojoule_per_mole/nanometer)
integrator.setMinimizationErrorTolerance(1*kilocalorie_per_mole/angstrom)
1*kilojoule_per_mole/nanometer)
integrator.setMinimizationErrorTolerance(0.1*kilocalorie_per_mole/angstrom)
self.assertAlmostEqualUnit(integrator.getMinimizationErrorTolerance(),
1*kilocalorie_per_mole/angstrom)
0.1*kilocalorie_per_mole/angstrom)
def testDrudeLangevinIntegrator(self):
""" Tests the DrudeLangevinIntegrator API features """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment