PME works correctly in CPU platform

e22b8f53 · peastman · 6b10b909 · e22b8f53 · e22b8f53 · e22b8f53
Commit e22b8f53 authored Oct 11, 2013 by peastman
7 changed files
--- a/platforms/cpu/include/CpuKernels.h
+++ b/platforms/cpu/include/CpuKernels.h
@@ -44,7 +44,7 @@ namespace OpenMM {
 */
 class CpuCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
 public:
-    CpuCalcNonbondedForceKernel(std::string name, const Platform& platform) : CalcNonbondedForceKernel(name, platform) {
+    CpuCalcNonbondedForceKernel(std::string name, const Platform& platform) : CalcNonbondedForceKernel(name, platform), bonded14IndexArray(NULL), bonded14ParamArray(NULL) {
    }
    ~CpuCalcNonbondedForceKernel();
    /**
@@ -75,12 +75,12 @@ public:
 private:
    int numParticles, num14;
    int **bonded14IndexArray;
-    float **particleParamArray;
    double **bonded14ParamArray;
-    double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, dispersionCoefficient;
+    double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldSelfEnergy, dispersionCoefficient;
    int kmax[3], gridSize[3];
    bool useSwitchingFunction;
    std::vector<std::set<int> > exclusions;
+    std::vector<std::pair<float, float> > particleParams;
    std::vector<float> posq;
    std::vector<float> forces;
    NonbondedMethod nonbondedMethod;

--- a/platforms/cpu/include/CpuNonbondedForce.h
+++ b/platforms/cpu/include/CpuNonbondedForce.h
@@ -49,30 +49,7 @@ class CpuNonbondedForce {
      int numRx, numRy, numRz;
      int meshDim[3];
      __m128 boxSize, invBoxSize, half;
+      static float TWO_OVER_SQRT_PI;
-      // parameter indices
-      static const int SigIndex = 0;
-      static const int EpsIndex = 1;
-      static const int   QIndex = 2;
-      /**---------------------------------------------------------------------------------------
-         Calculate LJ Coulomb pair ixn between two atoms
-         @param atom1            the index of the first atom
-         @param atom2            the index of the second atom
-         @param atomCoordinates  atom coordinates
-         @param atomParameters   atom parameters (charges, c6, c12, ...)     atomParameters[atomIndex][paramterIndex]
-         @param forces           force array (forces added)
-         @param totalEnergy      total energy
-         --------------------------------------------------------------------------------------- */
-      void calculateOneIxn( int atom1, int atom2, float* atomCoordinates,
-                            float** atomParameters, float* forces,
-                            double* totalEnergy ) const;
   public:
@@ -150,54 +127,86 @@ class CpuNonbondedForce {
         --------------------------------------------------------------------------------------- */
      void setUsePME(float alpha, int meshSize[3]);
+      /**---------------------------------------------------------------------------------------
+         Calculate Ewald ixn
+         @param numberOfAtoms    number of atoms
+         @param posq             atom coordinates and charges
+         @param atomCoordinates  atom coordinates (in format needed by PME)
+         @param atomParameters   atom parameters (sigma/2, 2*sqrt(epsilon))
+         @param exclusions       atom exclusion indices
+                                 exclusions[atomIndex] contains the list of exclusions for that atom
+         @param fixedParameters  non atom parameters (not currently used)
+         @param forces           force array (forces added)
+         @param totalEnergy      total energy
+         --------------------------------------------------------------------------------------- */
+      void calculateReciprocalIxn(int numberOfAtoms, float* posq, std::vector<OpenMM::RealVec>& atomCoordinates,
+                            const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
+                            float* fixedParameters, std::vector<OpenMM::RealVec>& forces, float* totalEnergy) const;
      /**---------------------------------------------------------------------------------------
         Calculate LJ Coulomb pair ixn
         @param numberOfAtoms    number of atoms
-         @param atomCoordinates  atom coordinates
+         @param posq             atom coordinates and charges
-         @param atomParameters   atom parameters (charges, c6, c12, ...)     atomParameters[atomIndex][paramterIndex]
+         @param atomParameters   atom parameters (sigma/2, 2*sqrt(epsilon))
         @param exclusions       atom exclusion indices
                                 exclusions[atomIndex] contains the list of exclusions for that atom
         @param fixedParameters  non atom parameters (not currently used)
         @param forces           force array (forces added)
         @param totalEnergy      total energy
-         @param includeDirect      true if direct space interactions should be included
-         @param includeReciprocal  true if reciprocal space interactions should be included
         --------------------------------------------------------------------------------------- */
-      void calculatePairIxn(int numberOfAtoms, float* atomCoordinates,
+      void calculateDirectIxn(int numberOfAtoms, float* posq,
-                            float** atomParameters, std::vector<std::set<int> >& exclusions,
+                            const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
-                            float* fixedParameters, float* forces,
+                            float* fixedParameters, float* forces, float* totalEnergy) const;
-                            float* totalEnergy, bool includeDirect, bool includeReciprocal) const;
 private:
      /**---------------------------------------------------------------------------------------
-         Calculate Ewald ixn
+         Calculate LJ Coulomb pair ixn between two atoms
-         @param numberOfAtoms    number of atoms
+         @param atom1            the index of the first atom
-         @param atomCoordinates  atom coordinates
+         @param atom2            the index of the second atom
-         @param atomParameters   atom parameters (charges, c6, c12, ...)     atomParameters[atomIndex][paramterIndex]
+         @param posq             atom coordinates and charges
-         @param exclusions       atom exclusion indices
+         @param atomParameters   atom parameters (sigma/2, 2*sqrt(epsilon))
-                                 exclusions[atomIndex] contains the list of exclusions for that atom
-         @param fixedParameters  non atom parameters (not currently used)
         @param forces           force array (forces added)
         @param totalEnergy      total energy
-         @param includeDirect      true if direct space interactions should be included
-         @param includeReciprocal  true if reciprocal space interactions should be included
         --------------------------------------------------------------------------------------- */
-      void calculateEwaldIxn(int numberOfAtoms, float* atomCoordinates,
+      void calculateOneIxn( int atom1, int atom2, float* posq,
-                            float** atomParameters, std::vector<std::set<int> >& exclusions,
+                            const std::vector<std::pair<float, float> >& atomParameters, float* forces,
-                            float* fixedParameters, float* forces,
+                            double* totalEnergy ) const;
-                            float* totalEnergy, bool includeDirect, bool includeReciprocal) const;
+      /**---------------------------------------------------------------------------------------
+         Calculate LJ Coulomb pair ixn between two atoms
+         @param atom1            the index of the first atom
+         @param atom2            the index of the second atom
+         @param posq             atom coordinates and charges
+         @param atomParameters   atom parameters (sigma/2, 2*sqrt(epsilon))
+         @param forces           force array (forces added)
+         @param totalEnergy      total energy
+         --------------------------------------------------------------------------------------- */
+      void calculateOneEwaldIxn( int atom1, int atom2, float* posq,
+                            const std::vector<std::pair<float, float> >& atomParameters, float* forces,
+                            double* totalEnergy ) const;
      void getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic) const;
+      static float erfcApprox(float x);
 };
 // ---------------------------------------------------------------------------------------

--- a/platforms/cpu/src/CpuKernels.cpp
+++ b/platforms/cpu/src/CpuKernels.cpp
@@ -63,9 +63,14 @@ static RealVec& extractBoxSize(ContextImpl& context) {
 }
 CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
-    delete bonded14IndexArray; // Do this properly
+    if (bonded14ParamArray != NULL) {
-    delete bonded14ParamArray; // Do this properly
+        for (int i = 0; i < num14; i++) {
-    delete particleParamArray; // Do this properly
+            delete[] bonded14IndexArray[i];
+            delete[] bonded14ParamArray[i];
+        }
+        delete bonded14IndexArray;
+        delete bonded14ParamArray;
+    }
 }
 void CpuCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
@@ -96,16 +101,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
    bonded14ParamArray = new double*[num14];
    for (int i = 0; i < num14; i++)
        bonded14ParamArray[i] = new double[3];
-    particleParamArray = new float*[numParticles];
+    particleParams.resize(numParticles);
-    for (int i = 0; i < numParticles; i++)
+    double sumSquaredCharges = 0.0;
-        particleParamArray[i] = new float[3];
    for (int i = 0; i < numParticles; ++i) {
        double charge, radius, depth;
        force.getParticleParameters(i, charge, radius, depth);
        posq[4*i+3] = (float) charge;
-        particleParamArray[i][0] = (float) (0.5*radius);
+        particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
-        particleParamArray[i][1] = (float) (2.0*sqrt(depth));
+        sumSquaredCharges += charge*charge;
-        particleParamArray[i][2] = (float) (charge);
    }
    for (int i = 0; i < num14; ++i) {
        int particle1, particle2;
@@ -135,6 +138,10 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
        NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]);
        ewaldAlpha = alpha;
    }
+    if (nonbondedMethod == Ewald || nonbondedMethod == PME)
+        ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
+    else
+        ewaldSelfEnergy = 0.0;
    rfDielectric = force.getReactionFieldDielectric();
    if (force.getUseDispersionCorrection())
        dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
@@ -147,7 +154,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
    vector<RealVec>& forceData = extractForces(context);
    RealVec boxSize = extractBoxSize(context);
    float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
-    double energy = 0;
+    double energy = ewaldSelfEnergy;
    CpuNonbondedForce clj;
    bool periodic = (nonbondedMethod == CutoffPeriodic);
    bool ewald  = (nonbondedMethod == Ewald);
@@ -186,9 +193,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
        clj.setUsePME(ewaldAlpha, gridSize);
    if (useSwitchingFunction)
        clj.setUseSwitchingFunction(switchingDistance);
-    float directEnergy = 0;
+    float nonbondedEnergy = 0;
-    clj.calculatePairIxn(numParticles, &posq[0], particleParamArray, exclusions, 0, &forces[0], includeEnergy ? &directEnergy : NULL, includeDirect, includeReciprocal);
+    if (includeDirect)
-    energy += directEnergy;
+        clj.calculateDirectIxn(numParticles, &posq[0], particleParams, exclusions, 0, &forces[0], includeEnergy ? &nonbondedEnergy : NULL);
+    if (includeReciprocal)
+        clj.calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, 0, forceData, includeEnergy ? &nonbondedEnergy : NULL);
+    energy += nonbondedEnergy;
    for (int i = 0; i < numParticles; i++) {
        forceData[i][0] += forces[4*i];
        forceData[i][1] += forces[4*i+1];
@@ -220,13 +230,18 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
    // Record the values.
+    double sumSquaredCharges = 0.0;
    for (int i = 0; i < numParticles; ++i) {
        double charge, radius, depth;
        force.getParticleParameters(i, charge, radius, depth);
-        particleParamArray[i][0] = (float) (0.5*radius);
+        posq[4*i+3] = (float) charge;
-        particleParamArray[i][1] = (float) (2.0*sqrt(depth));
+        particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
-        particleParamArray[i][2] = (float) (charge);
+        sumSquaredCharges += charge*charge;
    }
+    if (nonbondedMethod == Ewald || nonbondedMethod == PME)
+        ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
+    else
+        ewaldSelfEnergy = 0.0;
    for (int i = 0; i < num14; ++i) {
        int particle1, particle2;
        double charge, radius, depth;

--- a/platforms/cpu/src/CpuNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuNonbondedForce.cpp
@@ -23,11 +23,9 @@
 */
 #include <string.h>
-#include <sstream>
 #include <complex>
 #include "SimTKOpenMMCommon.h"
-#include "SimTKOpenMMLog.h"
 #include "SimTKOpenMMUtilities.h"
 #include "CpuNonbondedForce.h"
 #include "ReferenceForce.h"
@@ -39,6 +37,8 @@
 using namespace std;
+float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
 /**---------------------------------------------------------------------------------------
   CpuNonbondedForce constructor
@@ -164,357 +164,192 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
      pme = true;
  }
-/**---------------------------------------------------------------------------------------
+void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, vector<OpenMM::RealVec>& atomCoordinates,
+                                             const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
-   Calculate Ewald ixn
+                                             float* fixedParameters, vector<OpenMM::RealVec>& forces, float* totalEnergy) const {
-   @param numberOfAtoms    number of atoms
-   @param atomCoordinates  atom coordinates
-   @param atomParameters   atom parameters                             atomParameters[atomIndex][paramterIndex]
-   @param exclusions       atom exclusion indices
-                           exclusions[atomIndex] contains the list of exclusions for that atom
-   @param fixedParameters  non atom parameters (not currently used)
-   @param forces           force array (forces added)
-   @param totalEnergy      total energy
-   @param includeDirect      true if direct space interactions should be included
-   @param includeReciprocal  true if reciprocal space interactions should be included
-   --------------------------------------------------------------------------------------- */
-void CpuNonbondedForce::calculateEwaldIxn(int numberOfAtoms, float* atomCoordinates,
-                                             float** atomParameters, vector<set<int> >& exclusions,
-                                             float* fixedParameters, float* forces,
-                                             float* totalEnergy, bool includeDirect, bool includeReciprocal) const {
    typedef std::complex<float> d_complex;
    static const float epsilon     =  1.0;
-    static const float one         =  1.0;
-    static const float six         =  6.0;
-    static const float twelve      = 12.0;
    int kmax                            = (ewald ? std::max(numRx, std::max(numRy,numRz)) : 0);
-    float  factorEwald             = -1 / (4*alphaEwald*alphaEwald);
+    float factorEwald              = -1 / (4*alphaEwald*alphaEwald);
-    float SQRT_PI                  = sqrt(PI_M);
    float TWO_PI                   = 2.0 * PI_M;
    float recipCoeff               = (float)(ONE_4PI_EPS0*4*PI_M/(periodicBoxSize[0] * periodicBoxSize[1] * periodicBoxSize[2]) /epsilon);
-    float totalSelfEwaldEnergy     = 0.0;
+    if (pme) {
-    float realSpaceEwaldEnergy     = 0.0;
+        pme_t pmedata;
-    float recipEnergy              = 0.0;
+        RealOpenMM virial[3][3];
-    float vdwEnergy                = 0.0;
+        pme_init(&pmedata, alphaEwald, numberOfAtoms, meshDim, 5, 1);
+        vector<RealOpenMM> charges(numberOfAtoms);
-// **************************************************************************************
+        for (int i = 0; i < numberOfAtoms; i++)
-// SELF ENERGY
+            charges[i] = posq[4*i+3];
-// **************************************************************************************
+        RealOpenMM boxSize[3] = {periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2]};
+        RealOpenMM recipEnergy = 0.0;
-    if (includeReciprocal) {
+        pme_exec(pmedata, atomCoordinates, forces, charges, boxSize, &recipEnergy, virial);
-        for (int atomID = 0; atomID < numberOfAtoms; atomID++){
+        if (totalEnergy)
-            float selfEwaldEnergy       = (float) (ONE_4PI_EPS0*atomCoordinates[4*atomID+3]*atomCoordinates[4*atomID+3] * alphaEwald/SQRT_PI);
+            *totalEnergy += recipEnergy;
-            totalSelfEwaldEnergy            -= selfEwaldEnergy;
-        }
-    }
-    if (totalEnergy){
-        *totalEnergy += totalSelfEwaldEnergy;
-    }
-// **************************************************************************************
-// RECIPROCAL SPACE EWALD ENERGY AND FORCES
-// **************************************************************************************
-    // PME
-  if (pme && includeReciprocal) {
-    pme_t          pmedata; /* abstract handle for PME data */
-    float virial[3][3];
-    pme_init(&pmedata,alphaEwald,numberOfAtoms,meshDim,5,1);
-//    pme_exec(pmedata,atomCoordinates,forces,atomParameters,periodicBoxSize,&recipEnergy,virial);
-    if (totalEnergy)
-       *totalEnergy += recipEnergy;
        pme_destroy(pmedata);
-  }
+    }
    // Ewald method
-  else if (ewald && includeReciprocal) {
+    else if (ewald) {
-    // setup reciprocal box
-    float recipBoxSize[3] = { TWO_PI / periodicBoxSize[0], TWO_PI / periodicBoxSize[1], TWO_PI / periodicBoxSize[2]};
-    // setup K-vectors
-  #define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
-  vector<d_complex> eir(kmax*numberOfAtoms*3);
-  vector<d_complex> tab_xy(numberOfAtoms);
-  vector<d_complex> tab_qxyz(numberOfAtoms);
-  if (kmax < 1) {
-      std::stringstream message;
-      message << " kmax < 1 , Aborting" << std::endl;
-      SimTKOpenMMLog::printError(message);
-  }
-  for (int i = 0; (i < numberOfAtoms); i++) {
-    float* pos = atomCoordinates+4*i;
-    for (int m = 0; (m < 3); m++)
-      EIR(0, i, m) = d_complex(1,0);
-    for (int m=0; (m<3); m++)
+        // setup reciprocal box
-      EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
-                               sin(pos[m]*recipBoxSize[m]));
-    for (int j=2; (j<kmax); j++)
-      for (int m=0; (m<3); m++)
-        EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
-  }
-    // calculate reciprocal space energy and forces
+        float recipBoxSize[3] = { TWO_PI / periodicBoxSize[0], TWO_PI / periodicBoxSize[1], TWO_PI / periodicBoxSize[2]};
-    int lowry = 0;
-    int lowrz = 1;
-    for (int rx = 0; rx < numRx; rx++) {
+        // setup K-vectors
-      float kx = rx * recipBoxSize[0];
+        #define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
+        vector<d_complex> eir(kmax*numberOfAtoms*3);
+        vector<d_complex> tab_xy(numberOfAtoms);
+        vector<d_complex> tab_qxyz(numberOfAtoms);
-      for (int ry = lowry; ry < numRy; ry++) {
+        for (int i = 0; (i < numberOfAtoms); i++) {
+            float* pos = posq+4*i;
+            for (int m = 0; (m < 3); m++)
+              EIR(0, i, m) = d_complex(1,0);
-        float ky = ry * recipBoxSize[1];
+            for (int m=0; (m<3); m++)
+              EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
+                                       sin(pos[m]*recipBoxSize[m]));
-        if (ry >= 0) {
+            for (int j=2; (j<kmax); j++)
-          for (int n = 0; n < numberOfAtoms; n++)
+              for (int m=0; (m<3); m++)
-            tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
+                EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
        }
-        else {
+        // calculate reciprocal space energy and forces
-          for (int n = 0; n < numberOfAtoms; n++)
-            tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
+        int lowry = 0;
-        }
+        int lowrz = 1;
-        for (int rz = lowrz; rz < numRz; rz++) {
+        for (int rx = 0; rx < numRx; rx++) {
+            float kx = rx * recipBoxSize[0];
-          if (rz >= 0) {
+            for (int ry = lowry; ry < numRy; ry++) {
-           for (int n = 0; n < numberOfAtoms; n++)
+                float ky = ry * recipBoxSize[1];
-             tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * EIR(rz, n, 2));
+                if (ry >= 0) {
-          }
+                    for (int n = 0; n < numberOfAtoms; n++)
+                      tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
-          else {
+                }
-            for (int n = 0; n < numberOfAtoms; n++)
+                else {
-              tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * conj(EIR(-rz, n, 2)));
+                    for (int n = 0; n < numberOfAtoms; n++)
-          }
+                      tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
+                }
-          float cs = 0.0f;
+                for (int rz = lowrz; rz < numRz; rz++) {
-          float ss = 0.0f;
+                    if (rz >= 0) {
+                        for (int n = 0; n < numberOfAtoms; n++)
-          for (int n = 0; n < numberOfAtoms; n++) {
+                            tab_qxyz[n] = posq[4*n+3] * (tab_xy[n] * EIR(rz, n, 2));
-            cs += tab_qxyz[n].real();
+                    }
-            ss += tab_qxyz[n].imag();
+                    else {
-          }
+                        for (int n = 0; n < numberOfAtoms; n++)
+                            tab_qxyz[n] = posq[4*n+3] * (tab_xy[n] * conj(EIR(-rz, n, 2)));
-          float kz = rz * recipBoxSize[2];
+                    }
-          float k2 = kx * kx + ky * ky + kz * kz;
+                    float cs = 0.0f;
-          float ak = exp(k2*factorEwald) / k2;
+                    float ss = 0.0f;
-          for (int n = 0; n < numberOfAtoms; n++) {
+                    for (int n = 0; n < numberOfAtoms; n++) {
-            float force = ak * (cs * tab_qxyz[n].imag() - ss * tab_qxyz[n].real());
+                        cs += tab_qxyz[n].real();
-            float* f = forces+4*n;
+                        ss += tab_qxyz[n].imag();
-            f[0] += 2 * recipCoeff * force * kx;
+                    }
-            f[1] += 2 * recipCoeff * force * ky;
-            f[2] += 2 * recipCoeff * force * kz;
+                    float kz = rz * recipBoxSize[2];
-          }
+                    float k2 = kx * kx + ky * ky + kz * kz;
+                    float ak = exp(k2*factorEwald) / k2;
-          if (totalEnergy)
-             *totalEnergy += recipCoeff * ak * (cs * cs + ss * ss);
+                    for (int n = 0; n < numberOfAtoms; n++) {
+                        float force = ak * (cs * tab_qxyz[n].imag() - ss * tab_qxyz[n].real());
-          lowrz = 1 - numRz;
+                        forces[n][0] += 2 * recipCoeff * force * kx;
+                        forces[n][1] += 2 * recipCoeff * force * ky;
+                        forces[n][2] += 2 * recipCoeff * force * kz;
+                    }
+                    if (totalEnergy)
+                        *totalEnergy += recipCoeff * ak * (cs * cs + ss * ss);
+                    lowrz = 1 - numRz;
+                }
+                lowry = 1 - numRy;
+            }
        }
-        lowry = 1 - numRy;
-      }
    }
-  }
+}
-// **************************************************************************************
-// SHORT-RANGE ENERGY AND FORCES
-// **************************************************************************************
-    if (!includeDirect)
-        return;
-    double totalVdwEnergy            = 0.0f;
-    double totalRealSpaceEwaldEnergy = 0.0f;
-    for (int i = 0; i < (int) neighborList->size(); i++) {
-       pair<int, int> pair = (*neighborList)[i];
-       int ii = pair.first;
-       int jj = pair.second;
-       __m128 deltaR;
-       __m128 posI = _mm_loadu_ps(atomCoordinates+4*ii);
-       __m128 posJ = _mm_loadu_ps(atomCoordinates+4*jj);
-       float r2;
-       getDeltaR(posJ, posI, deltaR, r2, true);
-       float r         = sqrtf(r2);
-       float inverseR  = 1/r;
-       float switchValue = 1, switchDeriv = 0;
-       if (useSwitch && r > switchingDistance) {
-           float t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
-           switchValue = 1+t*t*t*(-10+t*(15-t*6));
-           switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
-       }
-       float alphaR    = alphaEwald * r;
-       float chargeProd = ONE_4PI_EPS0*atomCoordinates[4*ii+3]*atomCoordinates[4*jj+3];
-       float dEdR      = (float) (chargeProd * inverseR * inverseR * inverseR);
-             dEdR      = (float) (dEdR * (erfc(alphaR) + 2 * alphaR * exp (- alphaR * alphaR) / SQRT_PI));
-       float sig       = atomParameters[ii][SigIndex] +  atomParameters[jj][SigIndex];
-       float sig2      = inverseR*sig;
-                  sig2     *= sig2;
-       float sig6      = sig2*sig2*sig2;
-       float eps       = atomParameters[ii][EpsIndex]*atomParameters[jj][EpsIndex];
-                  dEdR     += switchValue*eps*(twelve*sig6 - six)*sig6*inverseR*inverseR;
-       vdwEnergy = eps*(sig6-one)*sig6;
-       if (useSwitch) {
-           dEdR -= vdwEnergy*switchDeriv*inverseR;
-           vdwEnergy *= switchValue;
-       }
-       // accumulate forces
-       __m128 result = _mm_mul_ps(deltaR, _mm_set1_ps(dEdR));
-       _mm_storeu_ps(forces+4*ii, _mm_add_ps(_mm_loadu_ps(forces+4*ii), result));
-       _mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result));
-       // accumulate energies
-       realSpaceEwaldEnergy        = (float) (chargeProd*inverseR*erfc(alphaR));
-       totalVdwEnergy             += vdwEnergy;
-       totalRealSpaceEwaldEnergy  += realSpaceEwaldEnergy;
-    }
-    if (totalEnergy)
-        *totalEnergy += totalRealSpaceEwaldEnergy + totalVdwEnergy;
-    // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
-    float totalExclusionEnergy = 0.0f;
-    for (int i = 0; i < numberOfAtoms; i++)
-        for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
-            if (*iter > i) {
-               int ii = i;
-               int jj = *iter;
-               __m128 deltaR;
-               __m128 posI = _mm_loadu_ps(atomCoordinates+4*ii);
-               __m128 posJ = _mm_loadu_ps(atomCoordinates+4*jj);
-               float r2;
-               getDeltaR(posJ, posI, deltaR, r2, false);
-               float r         = sqrtf(r2);
-               float inverseR  = 1/r;
-               float alphaR    = alphaEwald * r;
-               if (erf(alphaR) > 1e-6) {
-                   float chargeProd = ONE_4PI_EPS0*atomCoordinates[4*ii+3]*atomCoordinates[4*jj+3];
-                   float dEdR      = (float) (chargeProd * inverseR * inverseR * inverseR);
-                         dEdR      = (float) (dEdR * (erf(alphaR) - 2 * alphaR * exp (- alphaR * alphaR) / SQRT_PI));
-                   // accumulate forces
-                   __m128 result = _mm_mul_ps(deltaR, _mm_set1_ps(dEdR));
-                   _mm_storeu_ps(forces+4*ii, _mm_add_ps(_mm_loadu_ps(forces+4*ii), result));
-                   _mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result));
-                   // accumulate energies
+void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq,
+                                             const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
+                                             float* fixedParameters, float* forces, float* totalEnergy) const {
-                   realSpaceEwaldEnergy = (float) (chargeProd*inverseR*erf(alphaR));
+    double directEnergy = 0;
+    double* energyPtr = (totalEnergy == NULL ? NULL : &directEnergy);
+    if (ewald || pme) {
+        // Compute the interactions from the neighbor list.
+        for (int i = 0; i < (int) neighborList->size(); i++) {
+            pair<int, int> pair = (*neighborList)[i];
+            calculateOneEwaldIxn(pair.first, pair.second, posq, atomParameters, forces, energyPtr);
+        }
-                   totalExclusionEnergy += realSpaceEwaldEnergy;
+        // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
-               }
+        for (int i = 0; i < numberOfAtoms; i++)
+            for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
+                if (*iter > i) {
+                   int ii = i;
+                   int jj = *iter;
+                   __m128 deltaR;
+                   __m128 posI = _mm_loadu_ps(posq+4*ii);
+                   __m128 posJ = _mm_loadu_ps(posq+4*jj);
+                   float r2;
+                   getDeltaR(posJ, posI, deltaR, r2, false);
+                   float r         = sqrtf(r2);
+                   float inverseR  = 1/r;
+                   float alphaR    = alphaEwald * r;
+                   float erfAlphaR = 1.0f-erfcApprox(alphaR);
+                   if (erfAlphaR > 1e-6) {
+                       float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3];
+                       float dEdR      = (float) (chargeProd * inverseR * inverseR * inverseR);
+                             dEdR      = (float) (dEdR * (erfAlphaR - TWO_OVER_SQRT_PI * alphaR * exp (- alphaR * alphaR)));
+                       __m128 result = _mm_mul_ps(deltaR, _mm_set1_ps(dEdR));
+                       _mm_storeu_ps(forces+4*ii, _mm_sub_ps(_mm_loadu_ps(forces+4*ii), result));
+                       _mm_storeu_ps(forces+4*jj, _mm_add_ps(_mm_loadu_ps(forces+4*jj), result));
+                       if (energyPtr != NULL)
+                           directEnergy -= chargeProd*inverseR*erfAlphaR;
+                   }
+                }
            }
+    }
+    else if (cutoff) {
+        // Compute the interactions from the neighbor list.
+        for (int i = 0; i < (int) neighborList->size(); i++) {
+            pair<int, int> pair = (*neighborList)[i];
+            calculateOneIxn(pair.first, pair.second, posq, atomParameters, forces, energyPtr);
        }
+    }
+    else {
+        // Loop over all atom pairs
-    if (totalEnergy)
+        for (int ii = 0; ii < numberOfAtoms; ii++){
-        *totalEnergy -= totalExclusionEnergy;
+            for (int jj = ii+1; jj < numberOfAtoms; jj++)
-}
+                if (exclusions[jj].find(ii) == exclusions[jj].end())
+                    calculateOneIxn(ii, jj, posq, atomParameters, forces, energyPtr);
+        }
-/**---------------------------------------------------------------------------------------
+    }
+    if (totalEnergy != NULL)
-   Calculate LJ Coulomb pair ixn
+        *totalEnergy += (float) directEnergy;
-   @param numberOfAtoms    number of atoms
-   @param atomCoordinates  atom coordinates
-   @param atomParameters   atom parameters                             atomParameters[atomIndex][paramterIndex]
-   @param exclusions       atom exclusion indices
-                           exclusions[atomIndex] contains the list of exclusions for that atom
-   @param fixedParameters  non atom parameters (not currently used)
-   @param forces           force array (forces added)
-   @param totalEnergy      total energy
-   @param includeDirect      true if direct space interactions should be included
-   @param includeReciprocal  true if reciprocal space interactions should be included
-   --------------------------------------------------------------------------------------- */
-void CpuNonbondedForce::calculatePairIxn(int numberOfAtoms, float* atomCoordinates,
-                                             float** atomParameters, vector<set<int> >& exclusions,
-                                             float* fixedParameters, float* forces,
-                                             float* totalEnergy, bool includeDirect, bool includeReciprocal) const {
-   if (ewald || pme) {
-       calculateEwaldIxn(numberOfAtoms, atomCoordinates, atomParameters, exclusions, fixedParameters, forces,
-               totalEnergy, includeDirect, includeReciprocal);
-       return;
-   }
-   if (!includeDirect)
-       return;
-   double directEnergy = 0;
-   double* energyPtr = (totalEnergy == NULL ? NULL : &directEnergy);
-   if (cutoff) {
-       for (int i = 0; i < (int) neighborList->size(); i++) {
-           pair<int, int> pair = (*neighborList)[i];
-           calculateOneIxn(pair.first, pair.second, atomCoordinates, atomParameters, forces, energyPtr);
-       }
-   }
-   else {
-       for (int ii = 0; ii < numberOfAtoms; ii++){
-          // loop over atom pairs
-          for (int jj = ii+1; jj < numberOfAtoms; jj++)
-              if (exclusions[jj].find(ii) == exclusions[jj].end())
-                  calculateOneIxn(ii, jj, atomCoordinates, atomParameters, forces, energyPtr);
-       }
-   }
-   if (totalEnergy != NULL)
-       *totalEnergy += (float) directEnergy;
 }
-  /**---------------------------------------------------------------------------------------
+void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq,
+                        const vector<pair<float, float> >& atomParameters, float* forces,
-     Calculate LJ Coulomb pair ixn between two atoms
-     @param ii               the index of the first atom
-     @param jj               the index of the second atom
-     @param atomCoordinates  atom coordinates
-     @param atomParameters   atom parameters (charges, c6, c12, ...)     atomParameters[atomIndex][paramterIndex]
-     @param forces           force array (forces added)
-     @param totalEnergy      total energy
-     --------------------------------------------------------------------------------------- */
-void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* atomCoordinates,
-                        float** atomParameters, float* forces,
                        double* totalEnergy) const {
    // get deltaR, R2, and R between 2 atoms
    __m128 deltaR;
-    __m128 posI = _mm_loadu_ps(atomCoordinates+4*ii);
+    __m128 posI = _mm_loadu_ps(posq+4*ii);
-    __m128 posJ = _mm_loadu_ps(atomCoordinates+4*jj);
+    __m128 posJ = _mm_loadu_ps(posq+4*jj);
    float r2;
    getDeltaR(posJ, posI, deltaR, r2, periodic);
    float r = sqrtf(r2);
@@ -525,14 +360,14 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* atomCoordinates,
        switchValue = 1+t*t*t*(-10+t*(15-t*6));
        switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
    }
-    float sig       = atomParameters[ii][SigIndex] +  atomParameters[jj][SigIndex];
+    float sig       = atomParameters[ii].first + atomParameters[jj].first;
    float sig2      = inverseR*sig;
          sig2     *= sig2;
    float sig6      = sig2*sig2*sig2;
-    float eps       = atomParameters[ii][EpsIndex]*atomParameters[jj][EpsIndex];
+    float eps       = atomParameters[ii].second*atomParameters[jj].second;
    float dEdR      = switchValue*eps*(12.0f*sig6 - 6.0f)*sig6;
-    float chargeProd = ONE_4PI_EPS0*atomCoordinates[4*ii+3]*atomCoordinates[4*jj+3];
+    float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3];
    if (cutoff)
        dEdR += (float) (chargeProd*(inverseR-2.0f*krf*r2));
    else
@@ -561,6 +396,54 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* atomCoordinates,
    _mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result));
  }
+void CpuNonbondedForce::calculateOneEwaldIxn(int ii, int jj, float* posq,
+                        const vector<pair<float, float> >& atomParameters, float* forces,
+                        double* totalEnergy) const {
+    __m128 deltaR;
+    __m128 posI = _mm_loadu_ps(posq+4*ii);
+    __m128 posJ = _mm_loadu_ps(posq+4*jj);
+    float r2;
+    getDeltaR(posJ, posI, deltaR, r2, true);
+    float r         = sqrtf(r2);
+    float inverseR  = 1/r;
+    float switchValue = 1, switchDeriv = 0;
+    if (useSwitch && r > switchingDistance) {
+        float t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
+        switchValue = 1+t*t*t*(-10+t*(15-t*6));
+        switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
+    }
+    float alphaR    = alphaEwald * r;
+    float erfcAlphaR = erfcApprox(alphaR);
+    float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3];
+    float dEdR      = (float) (chargeProd * inverseR * inverseR * inverseR);
+          dEdR      = (float) (dEdR * (erfcAlphaR + TWO_OVER_SQRT_PI * alphaR * exp(-alphaR*alphaR)));
+    float sig       = atomParameters[ii].first +  atomParameters[jj].first;
+    float sig2      = inverseR*sig;
+          sig2     *= sig2;
+    float sig6      = sig2*sig2*sig2;
+    float eps       = atomParameters[ii].second*atomParameters[jj].second;
+          dEdR     += switchValue*eps*(12.0f*sig6 - 6.0f)*sig6*inverseR*inverseR;
+    float energy = eps*(sig6-1.0f)*sig6;
+    if (useSwitch) {
+        dEdR -= energy*switchDeriv*inverseR;
+        energy *= switchValue;
+    }
+    // accumulate forces
+    __m128 result = _mm_mul_ps(deltaR, _mm_set1_ps(dEdR));
+    _mm_storeu_ps(forces+4*ii, _mm_add_ps(_mm_loadu_ps(forces+4*ii), result));
+    _mm_storeu_ps(forces+4*jj, _mm_sub_ps(_mm_loadu_ps(forces+4*jj), result));
+    // accumulate energies
+    if (totalEnergy) {
+        energy += (float) (chargeProd*inverseR*erfcAlphaR);
+        *totalEnergy += energy;
+    }
+}
 void CpuNonbondedForce::getDeltaR(const __m128& posI, const __m128& posJ, __m128& deltaR, float& r2, bool periodic) const {
    deltaR = _mm_sub_ps(posJ, posI);
    if (periodic) {
@@ -569,3 +452,15 @@ void CpuNonbondedForce::getDeltaR(const __m128& posI, const __m128& posJ, __m128
    }
    r2 = _mm_cvtss_f32(_mm_dp_ps(deltaR, deltaR, 0x71));
 }
+float CpuNonbondedForce::erfcApprox(float x) {
+    // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
+    // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).  It has a maximum
+    // error of 3e-7.
+    float t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*x)*x)*x)*x)*x)*x;
+    t *= t;
+    t *= t;
+    t *= t;
+    return 1.0f/(t*t);
+}
--- a/platforms/reference/include/ReferencePME.h
+++ b/platforms/reference/include/ReferencePME.h
@@ -54,11 +54,11 @@ pme_t;
 */
 int 
 pme_init(pme_t *       ppme,
-         RealOpenMM        ewaldcoeff,
+         RealOpenMM    ewaldcoeff,
         int           natoms,
-         const int           ngrid[3],
+         const int     ngrid[3],
         int           pme_order,
-             RealOpenMM        epsilon_r);
+         RealOpenMM    epsilon_r);
 /*
 * Evaluate reciprocal space PME energy and forces.
@@ -75,9 +75,9 @@ pme_init(pme_t *       ppme,
 */
 int
 pme_exec(pme_t       pme,
-         std::vector<OpenMM::RealVec>&  atomCoordinates,
+         std::vector<OpenMM::RealVec>& atomCoordinates,
-         std::vector<OpenMM::RealVec>&  forces,
+         std::vector<OpenMM::RealVec>& forces,
-         RealOpenMM **  atomParameters,
+         std::vector<RealOpenMM>& charges,
         const RealOpenMM  periodicBoxSize[3],
         RealOpenMM *    energy,
         RealOpenMM      pme_virial[3][3]);

--- a/platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
+++ b/platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
@@ -233,7 +233,10 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
    pme_init(&pmedata,alphaEwald,numberOfAtoms,meshDim,5,1);
-    pme_exec(pmedata,atomCoordinates,forces,atomParameters,periodicBoxSize,&recipEnergy,virial);
+    vector<RealOpenMM> charges(numberOfAtoms);
+    for (int i = 0; i < numberOfAtoms; i++)
+        charges[i] = atomParameters[i][QIndex];
+    pme_exec(pmedata,atomCoordinates,forces,charges,periodicBoxSize,&recipEnergy,virial);
    if( totalEnergy )
       *totalEnergy += recipEnergy;

--- a/platforms/reference/src/SimTKReference/ReferencePME.cpp
+++ b/platforms/reference/src/SimTKReference/ReferencePME.cpp
@@ -317,10 +317,8 @@ pme_update_bsplines(pme_t    pme)
 static void
-pme_grid_spread_charge(pme_t      pme,
+pme_grid_spread_charge(pme_t      pme, vector<RealOpenMM>& charges)
-                       RealOpenMM **   atomParameters)
 {
-        static const int   QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
    int       order;
    int       i;
    int       ix,iy,iz;
@@ -342,7 +340,7 @@ pme_grid_spread_charge(pme_t      pme,
    for(i=0;i<pme->natoms;i++)
    {
-        q = atomParameters[i][QIndex];
+        q = charges[i];
        /* Grid index for the actual atom position */
        x0index = pme->particleindex[i][0];
@@ -523,10 +521,9 @@ pme_reciprocal_convolution(pme_t     pme,
 static void
 pme_grid_interpolate_force(pme_t      pme,
                           const RealOpenMM     periodicBoxSize[3],
-                           RealOpenMM **   atomParameters,
+                           vector<RealOpenMM>& charges,
                           vector<RealVec>&   forces)
 {
-        static const int   QIndex = 2; // atom charges are stored in atomParameters[atomID][2]
    int       i;
    int       ix,iy,iz;
    int       x0index,y0index,z0index;
@@ -558,7 +555,7 @@ pme_grid_interpolate_force(pme_t      pme,
    {
        fx = fy = fz = 0;
-        q = atomParameters[i][QIndex];
+        q = charges[i];
        /* Grid index for the actual atom position */
        x0index = pme->particleindex[i][0];
@@ -671,7 +668,7 @@ pme_init(pme_t *       ppme,
 int pme_exec(pme_t       pme,
             vector<RealVec>&   atomCoordinates,
             vector<RealVec>&   forces,
-             RealOpenMM **   atomParameters,
+             vector<RealOpenMM>& charges,
             const RealOpenMM      periodicBoxSize[3],
             RealOpenMM *    energy,
             RealOpenMM      pme_virial[3][3])
@@ -692,7 +689,7 @@ int pme_exec(pme_t       pme,
    pme_update_bsplines(pme);
    /* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
-    pme_grid_spread_charge(pme,atomParameters);
+    pme_grid_spread_charge(pme, charges);
    /* do 3d-fft */
    fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
@@ -704,7 +701,7 @@ int pme_exec(pme_t       pme,
    fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
    /* Get the particle forces from the grid and bsplines in the pme structure */
-    pme_grid_interpolate_force(pme,periodicBoxSize,atomParameters,forces);
+    pme_grid_interpolate_force(pme,periodicBoxSize,charges,forces);
    return 0;
 }