Improve the performance of sending the AmoebaVdwLambda to Cuda using pinned...

Improve the performance of sending the AmoebaVdwLambda to Cuda using pinned host memory; Updated the AmoebaVdwForceProxy to version 3, and added backward compatibility to version 2; updated TestAPIUnits.py to handle the per particle lambda flag

Improve the performance of sending the AmoebaVdwLambda to Cuda using pinned...
Improve the performance of sending the AmoebaVdwLambda to Cuda using pinned host memory; Updated the AmoebaVdwForceProxy to version 3, and added backward compatibility to version 2; updated TestAPIUnits.py to handle the per particle lambda flag
926e7b9a · Michael Schnieders · 9c95f25c · 926e7b9a · 926e7b9a · 926e7b9a
Commit 926e7b9a authored Aug 22, 2019 by Michael Schnieders
5 changed files
--- a/plugins/amoeba/openmmapi/include/openmm/AmoebaVdwForce.h
+++ b/plugins/amoeba/openmmapi/include/openmm/AmoebaVdwForce.h
@@ -50,6 +50,19 @@ namespace OpenMM {
 * particle to another one.  This is typically done for hydrogens to place the interaction site slightly
 * closer to the parent atom.  The fraction is known as the "reduction factor", since it reduces the distance
 * from the parent atom to the interaction site.
+ *
+ * Support is also available for softcore interactions based on setting a per particle alchemical flag and
+ * setting the AmoebaVdwForce to use an "AlchemicalMethod" -- either Decouple or Annihilate.
+ * For Decouple, two alchemical atoms interact normally. For Annihilate, all interactions involving an 
+ * alchemical atom are influenced. The softcore state is specified by setting a single 
+ * Context parameter "AmoebaVdwLambda" between 0.0 and 1.0.
+ *
+ * The softcore functional form can be modified by setting the softcore power (default of 5) and the softcore
+ * alpha (default of 0,7). For more information on the softcore functional form see Eq. 2 from:
+ * Jiao, D.;  Golubkov, P. A.;  Darden, T. A.; Ren, P., 
+ * Calculation of protein-ligand binding free energy by using a polarizable potential.
+ * Proc. Natl. Acad. Sci. U.S.A. 2008, 105 (17), 6290-6295.
+ * https://www.pnas.org/content/105/17/6290.
 */
 class OPENMM_EXPORT_AMOEBA AmoebaVdwForce : public Force {

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
@@ -2353,32 +2353,40 @@ private:
 };
 CudaCalcAmoebaVdwForceKernel::CudaCalcAmoebaVdwForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) :
-        CalcAmoebaVdwForceKernel(name, platform), cu(cu), system(system), hasInitializedNonbonded(false), nonbonded(NULL) {
+        CalcAmoebaVdwForceKernel(name, platform), cu(cu), system(system), hasInitializedNonbonded(false), nonbonded(NULL), vdwLambdaPinnedBuffer(NULL) {
 }
 CudaCalcAmoebaVdwForceKernel::~CudaCalcAmoebaVdwForceKernel() {
    cu.setAsCurrent();
    if (nonbonded != NULL)
        delete nonbonded;
+    if (vdwLambdaPinnedBuffer != NULL) 
+        cuMemFreeHost(vdwLambdaPinnedBuffer);                              
 }
 void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const AmoebaVdwForce& force) {
    cu.setAsCurrent();
    sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
-    isAlchemical.initialize<float>(cu, cu.getPaddedNumAtoms(), "isAlchemical");
-    vdwLambda.initialize<float>(cu, 1, "vdwLambda");
    bondReductionAtoms.initialize<int>(cu, cu.getPaddedNumAtoms(), "bondReductionAtoms");
    bondReductionFactors.initialize<float>(cu, cu.getPaddedNumAtoms(), "bondReductionFactors");
    tempPosq.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4), "tempPosq");
    tempForces.initialize<long long>(cu, 3*cu.getPaddedNumAtoms(), "tempForces");
    // Record atom parameters.
    vector<float2> sigmaEpsilonVec(cu.getPaddedNumAtoms(), make_float2(0, 1));
    vector<float> isAlchemicalVec(cu.getPaddedNumAtoms(), 0);
    vector<int> bondReductionAtomsVec(cu.getPaddedNumAtoms(), 0);
    vector<float> bondReductionFactorsVec(cu.getPaddedNumAtoms(), 0);
    vector<vector<int> > exclusions(cu.getNumAtoms());
+    // Handle Alchemical parameters.
+    hasAlchemical = force.getAlchemicalMethod() != AmoebaVdwForce::None;
+    if (hasAlchemical) {
+       isAlchemical.initialize<float>(cu, cu.getPaddedNumAtoms(), "isAlchemical");
+       vdwLambda.initialize<float>(cu, 1, "vdwLambda");
+       CHECK_RESULT(cuMemHostAlloc(&vdwLambdaPinnedBuffer, sizeof(float), 0), "Error allocating vdwLambda pinned buffer");
+    }
    for (int i = 0; i < force.getNumParticles(); i++) {
        int ivIndex;
        double sigma, epsilon, reductionFactor;
@@ -2392,7 +2400,6 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
        exclusions[i].push_back(i);
    }
    sigmaEpsilon.upload(sigmaEpsilonVec);
-    isAlchemical.upload(isAlchemicalVec);
    bondReductionAtoms.upload(bondReductionAtomsVec);
    bondReductionFactors.upload(bondReductionFactorsVec);
    if (force.getUseDispersionCorrection())
@@ -2400,19 +2407,21 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
    else
        dispersionCoefficient = 0.0;               
-    // A single lambda parameter to define the alchemical vdW state.
-    vector<float> vdwLambdaVec(1, 0);
-    vdwLambdaVec[0] = 1.0f;
-    vdwLambda.upload(vdwLambdaVec);
    // This force is applied based on modified atom positions, where hydrogens have been moved slightly
    // closer to their parent atoms.  We therefore create a separate CudaNonbondedUtilities just for
    // this force, so it will have its own neighbor list and interaction kernel.
    nonbonded = new CudaNonbondedUtilities(cu);
    nonbonded->addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
+    if (hasAlchemical) {
+       isAlchemical.upload(isAlchemicalVec);
+       ((float*) vdwLambdaPinnedBuffer)[0] = 1.0f;
+       currentVdwLambda = 1.0f;
+       vdwLambda.upload(vdwLambdaPinnedBuffer, false);
       nonbonded->addParameter(CudaNonbondedUtilities::ParameterInfo("isAlchemical", "float", 1, sizeof(float), isAlchemical.getDevicePointer()));
       nonbonded->addArgument(CudaNonbondedUtilities::ParameterInfo("vdwLambda", "float", 1, sizeof(float), vdwLambda.getDevicePointer()));
+    }
    // Create the interaction kernel.
@@ -2431,16 +2440,16 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
        replacements["EPSILON_COMBINING_RULE"] = "1";
    else if (epsilonCombiningRule == "GEOMETRIC")
        replacements["EPSILON_COMBINING_RULE"] = "2";
-    else if (epsilonCombiningRule == "HARMONIC")
+    else if (epsilonCombiningRule =="HARMONIC")
        replacements["EPSILON_COMBINING_RULE"] = "3";
    else if (epsilonCombiningRule == "HHG")
        replacements["EPSILON_COMBINING_RULE"] = "4";
    else
        throw OpenMMException("Illegal combining rule for sigma: "+sigmaCombiningRule);
+    replacements["VDW_ALCHEMICAL_METHOD"] = cu.intToString(force.getAlchemicalMethod()); 
    replacements["VDW_SOFTCORE_POWER"] = cu.intToString(force.getSoftcorePower());
    replacements["VDW_SOFTCORE_ALPHA"] = cu.doubleToString(force.getSoftcoreAlpha()); 
-    replacements["VDW_ALCHEMICAL_METHOD"] = cu.intToString(force.getAlchemicalMethod()); 
    double cutoff = force.getCutoffDistance();
    double taperCutoff = cutoff*0.9;
@@ -2469,10 +2478,15 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
        nonbonded->initialize(system);
    }
-    // Not sure about passing a Context parameter to the Kernal?
+    if (hasAlchemical) {
-    vector<float> vdwLambdaVec(1, 0);
+       float contextLambda = context.getParameter(AmoebaVdwForce::Lambda());
-    vdwLambdaVec[0] = context.getParameter(AmoebaVdwForce::Lambda());
+       if (contextLambda != currentVdwLambda) {
-    vdwLambda.upload(vdwLambdaVec);
+          // Non-blocking copy of vdwLambda to device memory
+          ((float*) vdwLambdaPinnedBuffer)[0] = contextLambda;
+          vdwLambda.upload(vdwLambdaPinnedBuffer, false);
+          currentVdwLambda = contextLambda;
+       }
+    }
    cu.getPosq().copyTo(tempPosq);
    cu.getForce().copyTo(tempForces);
@@ -2512,7 +2526,7 @@ void CudaCalcAmoebaVdwForceKernel::copyParametersToContext(ContextImpl& context,
        bondReductionFactorsVec[i] = (float) reductionFactor;
    }
    sigmaEpsilon.upload(sigmaEpsilonVec);
-    isAlchemical.upload(isAlchemicalVec);
+    if (hasAlchemical) isAlchemical.upload(isAlchemicalVec);
    bondReductionAtoms.upload(bondReductionAtomsVec);
    bondReductionFactors.upload(bondReductionFactorsVec);
    if (force.getUseDispersionCorrection())

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
@@ -571,12 +571,22 @@ private:
    CudaContext& cu;
    const System& system;
    bool hasInitializedNonbonded;
+    // True if the AmoebaVdwForce AlchemicalMethod is not None.
+    bool hasAlchemical;
+    // Pinned host memory; allocated if necessary in initialize, and freed in the destructor.
+    void* vdwLambdaPinnedBuffer;
+    // Device memory for the alchemical state.
+    CudaArray vdwLambda;
+    // Only update device memory when lambda changes.
+    float currentVdwLambda;
+    // Per particle alchemical flag.
+    CudaArray isAlchemical;
    double dispersionCoefficient;
    CudaArray sigmaEpsilon;
    CudaArray bondReductionAtoms;
    CudaArray bondReductionFactors;
-    CudaArray isAlchemical;
-    CudaArray vdwLambda;
    CudaArray tempPosq;
    CudaArray tempForces;
    CudaNonbondedUtilities* nonbonded;

--- a/plugins/amoeba/serialization/src/AmoebaVdwForceProxy.cpp
+++ b/plugins/amoeba/serialization/src/AmoebaVdwForceProxy.cpp
@@ -42,7 +42,7 @@ AmoebaVdwForceProxy::AmoebaVdwForceProxy() : SerializationProxy("AmoebaVdwForce"
 }
 void AmoebaVdwForceProxy::serialize(const void* object, SerializationNode& node) const {
-    node.setIntProperty("version", 2);
+    node.setIntProperty("version", 3);
    const AmoebaVdwForce& force = *reinterpret_cast<const AmoebaVdwForce*>(object);
    node.setIntProperty("forceGroup", force.getForceGroup());
@@ -78,7 +78,7 @@ void AmoebaVdwForceProxy::serialize(const void* object, SerializationNode& node)
 void* AmoebaVdwForceProxy::deserialize(const SerializationNode& node) const {
    int version = node.getIntProperty("version");
-    if (version < 1 || version > 2)
+    if (version < 1 || version > 3)
        throw OpenMMException("Unsupported version number");
    AmoebaVdwForce* force = new AmoebaVdwForce();
    try {
@@ -89,14 +89,23 @@ void* AmoebaVdwForceProxy::deserialize(const SerializationNode& node) const {
        force->setCutoffDistance(node.getDoubleProperty("VdwCutoff"));
        force->setNonbondedMethod((AmoebaVdwForce::NonbondedMethod) node.getIntProperty("method"));
+        if (version > 2) {
           force->setAlchemicalMethod((AmoebaVdwForce::AlchemicalMethod) node.getIntProperty("alchemicalMethod"));
           force->setSoftcorePower(node.getDoubleProperty("n"));
           force->setSoftcoreAlpha(node.getDoubleProperty("alpha"));
+        }
        const SerializationNode& particles = node.getChildNode("VdwParticles");
        for (unsigned int ii = 0; ii < particles.getChildren().size(); ii++) {
            const SerializationNode& particle = particles.getChildren()[ii];
-            force->addParticle(particle.getIntProperty("ivIndex"), particle.getDoubleProperty("sigma"), particle.getDoubleProperty("epsilon"), particle.getDoubleProperty("reductionFactor"), particle.getBoolProperty("isAlchemical"));
+            if (version < 3) 
+               force->addParticle(particle.getIntProperty("ivIndex"), particle.getDoubleProperty("sigma"), 
+                                  particle.getDoubleProperty("epsilon"), particle.getDoubleProperty("reductionFactor"));
+            else 
+               force->addParticle(particle.getIntProperty("ivIndex"), particle.getDoubleProperty("sigma"), 
+                                  particle.getDoubleProperty("epsilon"), particle.getDoubleProperty("reductionFactor"), 
+                                  particle.getBoolProperty("isAlchemical"));
            // exclusions

--- a/wrappers/python/tests/TestAPIUnits.py
+++ b/wrappers/python/tests/TestAPIUnits.py
@@ -969,7 +969,7 @@ class TestAPIUnits(unittest.TestCase):
        self.assertEqual(force.getNumParticles(), 3)
-        p, sig, eps, scale = force.getParticleParameters(0)
+        p, sig, eps, scale, alchemical = force.getParticleParameters(0)
        self.assertEqual(p, 0)
        self.assertEqual(sig, 0.1*nanometers)
        self.assertIs(sig.unit, nanometers)
@@ -977,7 +977,7 @@ class TestAPIUnits(unittest.TestCase):
        self.assertIs(eps.unit, kilojoules_per_mole)
        self.assertEqual(scale, 1.0)
-        p, sig, eps, scale = force.getParticleParameters(1)
+        p, sig, eps, scale, alchemical = force.getParticleParameters(1)
        self.assertEqual(p, 1)
        self.assertEqual(sig, 1.0*angstroms)
        self.assertIs(sig.unit, nanometers)
@@ -985,7 +985,7 @@ class TestAPIUnits(unittest.TestCase):
        self.assertIs(eps.unit, kilojoules_per_mole)
        self.assertEqual(scale, 0.5)
-        p, sig, eps, scale = force.getParticleParameters(2)
+        p, sig, eps, scale, alchemical = force.getParticleParameters(2)
        self.assertEqual(p, 1)
        self.assertAlmostEqualUnit(sig, 0.8*angstroms)
        self.assertIs(sig.unit, nanometers)