Created HippoNonbondedForce (#2296)

* Created API for HIPPO force field * Beginning of reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Completed reference of HIPPO with no cutoff * Beginning cutoffs/PME for reference implementation of HIPPO * Continuing PME for reference implementation of HIPPO * Continuing PME for reference implementation of HIPPO * Continuing PME for reference implementation of HIPPO * Completed reference implementation of HIPPO * Cleanup and optimization to HIPPO reference * Further cleanup to HIPPO * Combined direct space interactions into a single loop * Compute direct space interactions in quasi-internal frame * Beginning of CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Finished CUDA implementation of HIPPO * More features and test cases for HippoNonbondedForce * Serialization and Python API for HippoNonbondedForce * Fixed sign error in computing forces

Created HippoNonbondedForce (#2296)
* Created API for HIPPO force field * Beginning of reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Continuing reference implementation of HIPPO * Completed reference of HIPPO with no cutoff * Beginning cutoffs/PME for reference implementation of HIPPO * Continuing PME for reference implementation of HIPPO * Continuing PME for reference implementation of HIPPO * Continuing PME for reference implementation of HIPPO * Completed reference implementation of HIPPO * Cleanup and optimization to HIPPO reference * Further cleanup to HIPPO * Combined direct space interactions into a single loop * Compute direct space interactions in quasi-internal frame * Beginning of CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Continuing CUDA implementation of HIPPO * Finished CUDA implementation of HIPPO * More features and test cases for HippoNonbondedForce * Serialization and Python API for HippoNonbondedForce * Fixed sign error in computing forces
1eec1e15 · peastman · GitHub · 0b22cac1 · 1eec1e15 · 1eec1e15
Unverified Commit 1eec1e15 authored Apr 09, 2019 by peastman Committed by GitHub Apr 09, 2019
20 changed files
--- a/openmmapi/include/openmm/Vec3.h
+++ b/openmmapi/include/openmm/Vec3.h
@@ -157,6 +157,10 @@ private:
    double data[3];
 };

+static Vec3 operator*(double lhs, Vec3 rhs) {
+    return Vec3(rhs[0]*lhs, rhs[1]*lhs, rhs[2]*lhs);
+}
+
 template <class CHAR, class TRAITS>
 std::basic_ostream<CHAR,TRAITS>& operator<<(std::basic_ostream<CHAR,TRAITS>& o, const Vec3& v) {
    o<<'['<<v[0]<<", "<<v[1]<<", "<<v[2]<<']';

--- a/platforms/cpu/src/CpuNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuNonbondedForce.cpp
@@ -276,9 +276,9 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
            double recipDispersionEnergy = 0.0;
            pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
            for (int i = 0; i < numberOfAtoms; i++){
-                forces[i][0] -= 2.0*dpmeforces[i][0];
-                forces[i][1] -= 2.0*dpmeforces[i][1];
-                forces[i][2] -= 2.0*dpmeforces[i][2];
+                forces[i][0] += dpmeforces[i][0];
+                forces[i][1] += dpmeforces[i][1];
+                forces[i][2] += dpmeforces[i][2];
            }
            if (totalEnergy)
                *totalEnergy += recipDispersionEnergy;

--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -694,7 +694,6 @@ private:
    CudaArray pmeDispersionBsplineModuliX;
    CudaArray pmeDispersionBsplineModuliY;
    CudaArray pmeDispersionBsplineModuliZ;
-    CudaArray pmeAtomRange;
    CudaArray pmeAtomGridIndex;
    CudaArray pmeEnergyBuffer;
    CudaSort* sort;

--- a/platforms/cuda/include/CudaNonbondedUtilities.h
+++ b/platforms/cuda/include/CudaNonbondedUtilities.h
@@ -277,6 +277,11 @@ public:
     * @param groups    the set of force groups
     */
    void createKernelsForGroups(int groups);
+    /**
+     * Set the source code for the main kernel.  This defaults to the content of nonbonded.cu.  It only needs to be
+     * changed in very unusual circumstances.
+     */
+    void setKernelSource(const std::string& source);
 private:
    class KernelSet;
    class BlockSortTrait;
@@ -311,6 +316,7 @@ private:
    double lastCutoff;
    bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
    int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
+    std::string kernelSource;
 };

 /**
@@ -343,9 +349,10 @@ public:
     * @param numComponents  the number of components in the parameter
     * @param size           the size of the parameter in bytes
     * @param memory         the memory containing the parameter values
+     * @param constant       whether the memory should be marked as constant
     */
-    ParameterInfo(const std::string& name, const std::string& componentType, int numComponents, int size, CUdeviceptr memory) :
-            name(name), componentType(componentType), numComponents(numComponents), size(size), memory(memory) {
+    ParameterInfo(const std::string& name, const std::string& componentType, int numComponents, int size, CUdeviceptr memory, bool constant=true) :
+            name(name), componentType(componentType), numComponents(numComponents), size(size), memory(memory), constant(constant) {
        if (numComponents == 1)
            type = componentType;
        else {
@@ -372,12 +379,16 @@ public:
    CUdeviceptr& getMemory() {
        return memory;
    }
+    bool isConstant() const {
+        return constant;
+    }
 private:
    std::string name;
    std::string componentType;
    std::string type;
    int size, numComponents;
    CUdeviceptr memory;
+    bool constant;
 };

 } // namespace OpenMM

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -1782,6 +1782,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                    pmeDefines["GRID_SIZE_Z"] = cu.intToString(dispersionGridSizeZ);
                    pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
                    pmeDefines["USE_LJPME"] = "1";
+                    pmeDefines["CHARGE_FROM_SIGEPS"] = "1";
                    double invRCut6 = pow(force.getCutoffDistance(), -6);
                    double dalphaR = dispersionAlpha * force.getCutoffDistance();
                    double dar2 = dalphaR*dalphaR;
@@ -1819,7 +1820,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                    pmeDispersionBsplineModuliY.initialize(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
                    pmeDispersionBsplineModuliZ.initialize(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
                }
-                pmeAtomRange.initialize<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
                pmeAtomGridIndex.initialize<int2>(cu, numParticles, "pmeAtomGridIndex");
                int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
                pmeEnergyBuffer.initialize(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");

--- a/platforms/cuda/src/CudaNonbondedUtilities.cpp
+++ b/platforms/cuda/src/CudaNonbondedUtilities.cpp
@@ -73,6 +73,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
    CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(int), CU_MEMHOSTALLOC_PORTABLE));
    numForceThreadBlocks = 4*multiprocessors;
    forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
+    setKernelSource(CudaKernelSources::nonbonded);
 }

 CudaNonbondedUtilities::~CudaNonbondedUtilities() {
@@ -510,13 +511,17 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
    replacements["ATOM_PARAMETER_DATA"] = localData.str();
    stringstream args;
    for (int i = 0; i < (int) params.size(); i++) {
-        args << ", const ";
+        args << ", ";
+        if (params[i].isConstant())
+            args << "const ";
        args << params[i].getType();
        args << "* __restrict__ global_";
        args << params[i].getName();
    }
    for (int i = 0; i < (int) arguments.size(); i++) {
-        args << ", const ";
+        args << ", ";
+        if (arguments[i].isConstant())
+            args << "const ";
        args << arguments[i].getType();
        args << "* __restrict__ ";
        args << arguments[i].getName();
@@ -710,7 +715,11 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
    defines["LAST_EXCLUSION_TILE"] = context.intToString(endExclusionIndex);
    if ((localDataSize/4)%2 == 0 && !context.getUseDoublePrecision())
        defines["PARAMETER_SIZE_IS_EVEN"] = "1";
-    CUmodule program = context.createModule(CudaKernelSources::vectorOps+context.replaceStrings(CudaKernelSources::nonbonded, replacements), defines);
+    CUmodule program = context.createModule(CudaKernelSources::vectorOps+context.replaceStrings(kernelSource, replacements), defines);
    CUfunction kernel = context.getKernel(program, "computeNonbonded");
    return kernel;
 }
+
+void CudaNonbondedUtilities::setKernelSource(const string& source) {
+    kernelSource = source;
+}
--- a/platforms/cuda/src/kernels/pme.cu
+++ b/platforms/cuda/src/kernels/pme.cu
@@ -22,7 +22,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
 extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
        real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
        real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
-#ifdef USE_LJPME
+#ifdef CHARGE_FROM_SIGEPS
        , const float2* __restrict__ sigmaEpsilon
 #else
        , const real* __restrict__ charges
@@ -50,7 +50,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
        int atom = pmeAtomGridIndex[i].x;
        real4 pos = posq[atom];
-#ifdef USE_LJPME
+#ifdef CHARGE_FROM_SIGEPS
        const float2 sigEps = sigmaEpsilon[atom];
        const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
 #else
@@ -275,7 +275,7 @@ extern "C" __global__
 void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
        real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
        real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
-#ifdef USE_LJPME
+#ifdef CHARGE_FROM_SIGEPS
        , const float2* __restrict__ sigmaEpsilon
 #else
        , const real* __restrict__ charges
@@ -352,7 +352,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
                }
            }
        }
-#ifdef USE_LJPME
+#ifdef CHARGE_FROM_SIGEPS
        const float2 sigEps = sigmaEpsilon[atom];
        real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
 #else

--- a/platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
+++ b/platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
@@ -252,17 +252,12 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<Vec3>& a
            // Dispersion reciprocal space terms
            pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);

-            std::vector<Vec3> dpmeforces;
-            for (int i = 0; i < numberOfAtoms; i++){
+            std::vector<Vec3> dpmeforces(numberOfAtoms);
+            for (int i = 0; i < numberOfAtoms; i++)
                charges[i] = 8.0*pow(atomParameters[i][SigIndex], 3.0) * atomParameters[i][EpsIndex];
-                dpmeforces.push_back(Vec3());
-            }
            pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
-            for (int i = 0; i < numberOfAtoms; i++){
-                forces[i][0] -= 2.0*dpmeforces[i][0];
-                forces[i][1] -= 2.0*dpmeforces[i][1];
-                forces[i][2] -= 2.0*dpmeforces[i][2];
-            }
+            for (int i = 0; i < numberOfAtoms; i++)
+                forces[i] += dpmeforces[i];
            if (totalEnergy)
                *totalEnergy += recipDispersionEnergy;
            pme_destroy(pmedata);

--- a/platforms/reference/src/SimTKReference/ReferencePME.cpp
+++ b/platforms/reference/src/SimTKReference/ReferencePME.cpp
@@ -538,7 +538,7 @@ dpme_reciprocal_convolution(pme_t pme,
    ny = pme->ngrid[1];
    nz = pme->ngrid[2];

-    boxfactor = M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);
+    boxfactor = -2*M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);

    esum = 0;

@@ -610,7 +610,7 @@ dpme_reciprocal_convolution(pme_t pme,
        }
    }
    // Remember the C6 energy is attractive, hence the negative sign.
-    *energy = -esum;
+    *energy = 0.5*esum;
 }



--- a/plugins/amoeba/openmmapi/include/OpenMMAmoeba.h
+++ b/plugins/amoeba/openmmapi/include/OpenMMAmoeba.h
@@ -43,5 +43,6 @@
 #include "openmm/AmoebaGeneralizedKirkwoodForce.h"
 #include "openmm/AmoebaVdwForce.h"
 #include "openmm/AmoebaWcaDispersionForce.h"
+#include "openmm/HippoNonbondedForce.h"

 #endif /*AMOEBA_OPENMM_H_*/
--- a/plugins/amoeba/openmmapi/include/openmm/HippoNonbondedForce.h
+++ b/plugins/amoeba/openmmapi/include/openmm/HippoNonbondedForce.h
--- a/plugins/amoeba/openmmapi/include/openmm/amoebaKernels.h
+++ b/plugins/amoeba/openmmapi/include/openmm/amoebaKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2018 Stanford University and the Authors.      *
 * Authors:                                                                   *
 * Contributors:                                                              *
 *                                                                            *
@@ -495,6 +495,61 @@ public:
    virtual void copyParametersToContext(ContextImpl& context, const AmoebaWcaDispersionForce& force) = 0;
 };

+/**
+ * This kernel is invoked by HippoNonbondedForce to calculate the forces acting on the system and the energy of the system.
+ */
+class CalcHippoNonbondedForceKernel : public KernelImpl {
+public:
+    static std::string Name() {
+        return "CalcHippoNonbondedForce";
+    }
+    CalcHippoNonbondedForceKernel(std::string name, const Platform& platform) : KernelImpl(name, platform) {
+    }
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param force      the MultipoleForce this kernel will be used for
+     */
+    virtual void initialize(const System& system, const HippoNonbondedForce& force) = 0;
+    /**
+     * Execute the kernel to calculate the forces and/or energy.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param includeForces  true if forces should be calculated
+     * @param includeEnergy  true if the energy should be calculated
+     * @return the potential energy due to the force
+     */
+    virtual double execute(ContextImpl& context, bool includeForces, bool includeEnergy) = 0;
+    virtual void getLabFramePermanentDipoles(ContextImpl& context, std::vector<Vec3>& dipoles) = 0;
+    virtual void getInducedDipoles(ContextImpl& context, std::vector<Vec3>& dipoles) = 0;
+    /**
+     * Copy changed parameters over to a context.
+     *
+     * @param context    the context to copy parameters to
+     * @param force      the AmoebaMultipoleForce to copy the parameters from
+     */
+    virtual void copyParametersToContext(ContextImpl& context, const HippoNonbondedForce& force) = 0;
+    /**
+     * Get the parameters being used for PME.
+     *
+     * @param alpha   the separation parameter
+     * @param nx      the number of grid points along the X axis
+     * @param ny      the number of grid points along the Y axis
+     * @param nz      the number of grid points along the Z axis
+     */
+    virtual void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const = 0;
+    /**
+     * Get the parameters being used for dispersion PME.
+     *
+     * @param alpha   the separation parameter
+     * @param nx      the number of grid points along the X axis
+     * @param ny      the number of grid points along the Y axis
+     * @param nz      the number of grid points along the Z axis
+     */
+    virtual void getDPMEParameters(double& alpha, int& nx, int& ny, int& nz) const = 0;
+};
+
 } // namespace OpenMM

 #endif /*AMOEBA_OPENMM_KERNELS_H*/
--- a/plugins/amoeba/openmmapi/include/openmm/internal/HippoNonbondedForceImpl.h
+++ b/plugins/amoeba/openmmapi/include/openmm/internal/HippoNonbondedForceImpl.h
+#ifndef OPENMM_HIPPO_NONBONDED_FORCE_IMPL_H_
+#define OPENMM_HIPPO_NONBONDED_FORCE_IMPL_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                OpenMMAmoeba                                *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2018 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/internal/ForceImpl.h"
+#include "openmm/HippoNonbondedForce.h"
+#include "openmm/Kernel.h"
+#include "openmm/Vec3.h"
+#include <string>
+
+namespace OpenMM {
+
+/**
+ * This is the internal implementation of HippoNonbondedForce.
+ */
+
+class OPENMM_EXPORT_AMOEBA HippoNonbondedForceImpl : public ForceImpl {
+public:
+    HippoNonbondedForceImpl(const HippoNonbondedForce& owner);
+    ~HippoNonbondedForceImpl();
+    void initialize(ContextImpl& context);
+    const HippoNonbondedForce& getOwner() const {
+        return owner;
+    }
+    void updateContextState(ContextImpl& context, bool& forcesInvalid) {
+        // This force field doesn't update the state directly.
+    }
+    double calcForcesAndEnergy(ContextImpl& context, bool includeForces, bool includeEnergy, int groups);
+    std::map<std::string, double> getDefaultParameters() {
+        return std::map<std::string, double>(); // This force doesn't define any parameters.
+    }
+    std::vector<std::string> getKernelNames();
+    void getLabFramePermanentDipoles(ContextImpl& context, std::vector<Vec3>& dipoles);
+    void getInducedDipoles(ContextImpl& context, std::vector<Vec3>& dipoles);
+    void updateParametersInContext(ContextImpl& context);
+    void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
+    void getDPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
+
+private:
+    const HippoNonbondedForce& owner;
+    Kernel kernel;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_HIPPO_NONBONDED_FORCE_IMPL_H_*/
--- a/plugins/amoeba/openmmapi/src/HippoNonbondedForce.cpp
+++ b/plugins/amoeba/openmmapi/src/HippoNonbondedForce.cpp
+/* -------------------------------------------------------------------------- *
+ *                                OpenMMAmoeba                                *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2008-2018 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/Force.h"
+#include "openmm/OpenMMException.h"
+#include "openmm/HippoNonbondedForce.h"
+#include "openmm/internal/AssertionUtilities.h"
+#include "openmm/internal/HippoNonbondedForceImpl.h"
+#include <sstream>
+
+using namespace OpenMM;
+using namespace std;
+
+HippoNonbondedForce::HippoNonbondedForce() : nonbondedMethod(NoCutoff), cutoffDistance(1.0), switchingDistance(0.9),
+        ewaldErrorTol(1e-4), alpha(0.0), dalpha(0.0), nx(0), ny(0), nz(0), dnx(0), dny(0), dnz(0) {
+    extrapolationCoefficients = {0.042, 0.635, 0.414};
+}
+
+HippoNonbondedForce::NonbondedMethod HippoNonbondedForce::getNonbondedMethod() const {
+    return nonbondedMethod;
+}
+
+void HippoNonbondedForce::setNonbondedMethod(HippoNonbondedForce::NonbondedMethod method) {
+    if (method < 0 || method > 1)
+        throw OpenMMException("HippoNonbondedForce: Illegal value for nonbonded method");
+    nonbondedMethod = method;
+}
+
+double HippoNonbondedForce::getCutoffDistance() const {
+    return cutoffDistance;
+}
+
+void HippoNonbondedForce::setCutoffDistance(double distance) {
+    cutoffDistance = distance;
+}
+
+double HippoNonbondedForce::getSwitchingDistance() const {
+    return switchingDistance;
+}
+
+void HippoNonbondedForce::setSwitchingDistance(double distance) {
+    switchingDistance = distance;
+}
+
+const std::vector<double> & HippoNonbondedForce::getExtrapolationCoefficients() const {
+    return extrapolationCoefficients;
+}
+
+void HippoNonbondedForce::setExtrapolationCoefficients(const std::vector<double> &coefficients) {
+    extrapolationCoefficients = coefficients;
+}
+
+void HippoNonbondedForce::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
+    alpha = this->alpha;
+    nx = this->nx;
+    ny = this->ny;
+    nz = this->nz;
+}
+
+void HippoNonbondedForce::getDPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
+    alpha = this->dalpha;
+    nx = this->dnx;
+    ny = this->dny;
+    nz = this->dnz;
+}
+
+void HippoNonbondedForce::setPMEParameters(double alpha, int nx, int ny, int nz) {
+    this->alpha = alpha;
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+}
+
+void HippoNonbondedForce::setDPMEParameters(double alpha, int nx, int ny, int nz) {
+    this->dalpha = alpha;
+    this->dnx = nx;
+    this->dny = ny;
+    this->dnz = nz;
+}
+
+void HippoNonbondedForce::getPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const {
+    dynamic_cast<const HippoNonbondedForceImpl&>(getImplInContext(context)).getPMEParameters(alpha, nx, ny, nz);
+}
+
+void HippoNonbondedForce::getDPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const {
+    dynamic_cast<const HippoNonbondedForceImpl&>(getImplInContext(context)).getDPMEParameters(alpha, nx, ny, nz);
+}
+
+double HippoNonbondedForce::getEwaldErrorTolerance() const {
+    return ewaldErrorTol;
+}
+
+void HippoNonbondedForce::setEwaldErrorTolerance(double tol) {
+    ewaldErrorTol = tol;
+}
+
+int HippoNonbondedForce::addParticle(double charge, const std::vector<double>& dipole, const std::vector<double>& quadrupole, double coreCharge,
+                                     double alpha, double epsilon, double damping, double c6, double pauliK, double pauliQ, double pauliAlpha,
+                                     double polarizability, int axisType, int multipoleAtomZ, int multipoleAtomX, int multipoleAtomY) {
+    particles.push_back(ParticleInfo(charge, dipole, quadrupole, coreCharge, alpha, epsilon, damping, c6, pauliK, pauliQ, pauliAlpha,
+                                     polarizability, axisType, multipoleAtomZ, multipoleAtomX, multipoleAtomY));
+    return particles.size()-1;
+}
+
+void HippoNonbondedForce::getParticleParameters(int index, double& charge, std::vector<double>& dipole, std::vector<double>& quadrupole, double& coreCharge,
+                                                double& alpha, double& epsilon, double& damping, double& c6, double& pauliK, double& pauliQ, double& pauliAlpha,
+                                                double& polarizability, int& axisType, int& multipoleAtomZ, int& multipoleAtomX, int& multipoleAtomY) const {
+    charge = particles[index].charge;
+    dipole = particles[index].dipole;
+    quadrupole = particles[index].quadrupole;
+    coreCharge = particles[index].coreCharge;
+    alpha = particles[index].alpha;
+    epsilon = particles[index].epsilon;
+    damping = particles[index].damping;
+    c6 = particles[index].c6;
+    pauliK = particles[index].pauliK;
+    pauliQ = particles[index].pauliQ;
+    pauliAlpha = particles[index].pauliAlpha;
+    polarizability = particles[index].polarizability;
+    axisType = particles[index].axisType;
+    multipoleAtomZ = particles[index].multipoleAtomZ;
+    multipoleAtomX = particles[index].multipoleAtomX;
+    multipoleAtomY = particles[index].multipoleAtomY;
+}
+
+void HippoNonbondedForce::setParticleParameters(int index, double charge, const std::vector<double>& dipole, const std::vector<double>& quadrupole, double coreCharge,
+                                                double alpha, double epsilon, double damping, double c6, double pauliK, double pauliQ, double pauliAlpha,
+                                                double polarizability, int axisType, int multipoleAtomZ, int multipoleAtomX, int multipoleAtomY) {
+    particles[index].charge = charge;
+    particles[index].dipole = dipole;
+    particles[index].quadrupole = quadrupole;
+    particles[index].coreCharge = coreCharge;
+    particles[index].alpha = alpha;
+    particles[index].epsilon = epsilon;
+    particles[index].damping = damping;
+    particles[index].c6 = c6;
+    particles[index].pauliK = pauliK;
+    particles[index].pauliQ = pauliQ;
+    particles[index].pauliAlpha = pauliAlpha;
+    particles[index].polarizability = polarizability;
+    particles[index].axisType = axisType;
+    particles[index].multipoleAtomZ = multipoleAtomZ;
+    particles[index].multipoleAtomX = multipoleAtomX;
+    particles[index].multipoleAtomY = multipoleAtomY;
+}
+
+int HippoNonbondedForce::addException(int particle1, int particle2, double multipoleMultipoleScale, double dipoleMultipoleScale, double dipoleDipoleScale,
+        double dispersionScale, double repulsionScale, bool replace) {
+    map<pair<int, int>, int>::iterator iter = exceptionMap.find(pair<int, int>(particle1, particle2));
+    int newIndex;
+    if (iter == exceptionMap.end())
+        iter = exceptionMap.find(pair<int, int>(particle2, particle1));
+    if (iter != exceptionMap.end()) {
+        if (!replace) {
+            stringstream msg;
+            msg << "HippoNonbondedForce: There is already an exception for particles ";
+            msg << particle1;
+            msg << " and ";
+            msg << particle2;
+            throw OpenMMException(msg.str());
+        }
+        exceptions[iter->second] = ExceptionInfo(particle1, particle2, multipoleMultipoleScale, dipoleMultipoleScale, dipoleDipoleScale, dispersionScale, repulsionScale);
+        newIndex = iter->second;
+        exceptionMap.erase(iter->first);
+    }
+    else {
+        exceptions.push_back(ExceptionInfo(particle1, particle2, multipoleMultipoleScale, dipoleMultipoleScale, dipoleDipoleScale, dispersionScale, repulsionScale));
+        newIndex = exceptions.size()-1;
+    }
+    exceptionMap[pair<int, int>(particle1, particle2)] = newIndex;
+    return newIndex;
+}
+void HippoNonbondedForce::getExceptionParameters(int index, int& particle1, int& particle2, double& multipoleMultipoleScale, double& dipoleMultipoleScale, double& dipoleDipoleScale,
+        double& dispersionScale, double& repulsionScale) const {
+    ASSERT_VALID_INDEX(index, exceptions);
+    particle1 = exceptions[index].particle1;
+    particle2 = exceptions[index].particle2;
+    multipoleMultipoleScale = exceptions[index].multipoleMultipoleScale;
+    dipoleMultipoleScale = exceptions[index].dipoleMultipoleScale;
+    dipoleDipoleScale = exceptions[index].dipoleDipoleScale;
+    dispersionScale = exceptions[index].dispersionScale;
+    repulsionScale = exceptions[index].repulsionScale;
+}
+
+void HippoNonbondedForce::setExceptionParameters(int index, int particle1, int particle2, double multipoleMultipoleScale, double dipoleMultipoleScale, double dipoleDipoleScale,
+        double dispersionScale, double repulsionScale) {
+    ASSERT_VALID_INDEX(index, exceptions);
+    exceptions[index].particle1 = particle1;
+    exceptions[index].particle2 = particle2;
+    exceptions[index].multipoleMultipoleScale = multipoleMultipoleScale;
+    exceptions[index].dipoleMultipoleScale = dipoleMultipoleScale;
+    exceptions[index].dipoleDipoleScale = dipoleDipoleScale;
+    exceptions[index].dispersionScale = dispersionScale;
+    exceptions[index].repulsionScale = repulsionScale;
+}
+
+void HippoNonbondedForce::getInducedDipoles(Context& context, vector<Vec3>& dipoles) {
+    dynamic_cast<HippoNonbondedForceImpl&>(getImplInContext(context)).getInducedDipoles(getContextImpl(context), dipoles);
+}
+
+void HippoNonbondedForce::getLabFramePermanentDipoles(Context& context, vector<Vec3>& dipoles) {
+    dynamic_cast<HippoNonbondedForceImpl&>(getImplInContext(context)).getLabFramePermanentDipoles(getContextImpl(context), dipoles);
+}
+
+ForceImpl* HippoNonbondedForce::createImpl()  const {
+    return new HippoNonbondedForceImpl(*this);
+}
+
+void HippoNonbondedForce::updateParametersInContext(Context& context) {
+    dynamic_cast<HippoNonbondedForceImpl&>(getImplInContext(context)).updateParametersInContext(getContextImpl(context));
+}
--- a/plugins/amoeba/openmmapi/src/HippoNonbondedForceImpl.cpp
+++ b/plugins/amoeba/openmmapi/src/HippoNonbondedForceImpl.cpp
+/* -------------------------------------------------------------------------- *
+ *                               OpenMMAmoeba                                 *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2018 Stanford University and the Authors.           *
+ * Authors: Peter Eastman, Mark Friedrichs                                    *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/internal/ContextImpl.h"
+#include "openmm/internal/HippoNonbondedForceImpl.h"
+#include "openmm/amoebaKernels.h"
+
+using namespace OpenMM;
+using namespace std;
+
+HippoNonbondedForceImpl::HippoNonbondedForceImpl(const HippoNonbondedForce& owner) : owner(owner) {
+}
+
+HippoNonbondedForceImpl::~HippoNonbondedForceImpl() {
+}
+
+void HippoNonbondedForceImpl::initialize(ContextImpl& context) {
+    const System& system = context.getSystem();
+    int numParticles = system.getNumParticles();
+    if (owner.getNumParticles() != numParticles)
+        throw OpenMMException("HippoNonbondedForce must have exactly as many particles as the System it belongs to.");
+
+    // check cutoff < 0.5*boxSize
+
+    if (owner.getNonbondedMethod() == HippoNonbondedForce::PME) {
+        Vec3 boxVectors[3];
+        system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
+        double cutoff = owner.getCutoffDistance();
+        if (cutoff > 0.5*boxVectors[0][0] || cutoff > 0.5*boxVectors[1][1] || cutoff > 0.5*boxVectors[2][2])
+            throw OpenMMException("HippoNonbondedForce: The cutoff distance cannot be greater than half the periodic box size.");
+    }
+
+    double quadrupoleValidationTolerance = 1.0e-05;
+    for (int i = 0; i < numParticles; i++) {
+        int axisType, multipoleAtomZ, multipoleAtomX, multipoleAtomY;
+        double charge, coreCharge, alpha, epsilon, damping, c6, pauliK, pauliQ, pauliAlpha, polarizability;
+        vector<double> dipole, quadrupole;
+        owner.getParticleParameters(i, charge, dipole, quadrupole, coreCharge,
+                               alpha, epsilon, damping, c6, pauliK, pauliQ, pauliAlpha,
+                               polarizability, axisType, multipoleAtomZ, multipoleAtomX, multipoleAtomY);
+
+       // check quadrupole is traceless and symmetric
+
+       double trace = fabs(quadrupole[0] + quadrupole[4] + quadrupole[8]);
+       if (trace > quadrupoleValidationTolerance) {
+             std::stringstream buffer;
+             buffer << "HippoNonbondedForce: qudarupole for particle=" << i;
+             buffer << " has nonzero trace: " << trace << "; AMOEBA plugin assumes traceless quadrupole.";
+             throw OpenMMException(buffer.str());
+       }
+       if (fabs(quadrupole[1] - quadrupole[3]) > quadrupoleValidationTolerance ) {
+             std::stringstream buffer;
+             buffer << "HippoNonbondedForce: XY and YX components of quadrupole for particle=" << i;
+             buffer << "  are not equal: [" << quadrupole[1] << " " << quadrupole[3] << "];";
+             buffer << " AMOEBA plugin assumes symmetric quadrupole tensor.";
+             throw OpenMMException(buffer.str());
+       }
+       if (fabs(quadrupole[2] - quadrupole[6]) > quadrupoleValidationTolerance ) {
+             std::stringstream buffer;
+             buffer << "HippoNonbondedForce: XZ and ZX components of quadrupole for particle=" << i;
+             buffer << "  are not equal: [" << quadrupole[2] << " " << quadrupole[6] << "];";
+             buffer << " AMOEBA plugin assumes symmetric quadrupole tensor.";
+             throw OpenMMException(buffer.str());
+       }
+       if (fabs(quadrupole[5] - quadrupole[7]) > quadrupoleValidationTolerance ) {
+             std::stringstream buffer;
+             buffer << "HippoNonbondedForce: YZ and ZY components of quadrupole for particle=" << i;
+             buffer << "  are not equal: [" << quadrupole[5] << " " << quadrupole[7] << "];";
+             buffer << " AMOEBA plugin assumes symmetric quadrupole tensor.";
+             throw OpenMMException(buffer.str());
+       }
+
+       // only 'Z-then-X', 'Bisector', Z-Bisect, ThreeFold  currently handled
+
+        if (axisType != HippoNonbondedForce::ZThenX     && axisType != HippoNonbondedForce::Bisector &&
+            axisType != HippoNonbondedForce::ZBisect    && axisType != HippoNonbondedForce::ThreeFold &&
+            axisType != HippoNonbondedForce::ZOnly      && axisType != HippoNonbondedForce::NoAxisType) {
+             std::stringstream buffer;
+             buffer << "HippoNonbondedForce: axis type=" << axisType;
+             buffer << " not currently handled - only axisTypes[ ";
+             buffer << HippoNonbondedForce::ZThenX   << ", " << HippoNonbondedForce::Bisector  << ", ";
+             buffer << HippoNonbondedForce::ZBisect  << ", " << HippoNonbondedForce::ThreeFold << ", ";
+             buffer << HippoNonbondedForce::NoAxisType;
+             buffer << "] (ZThenX, Bisector, Z-Bisect, ThreeFold, NoAxisType) currently handled .";
+             throw OpenMMException(buffer.str());
+        }
+        if (axisType != HippoNonbondedForce::NoAxisType && (multipoleAtomZ < 0 || multipoleAtomZ >= numParticles)) {
+            std::stringstream buffer;
+            buffer << "HippoNonbondedForce: invalid z axis particle: " << multipoleAtomZ;
+            throw OpenMMException(buffer.str());
+        }
+        if (axisType != HippoNonbondedForce::NoAxisType && axisType != HippoNonbondedForce::ZOnly &&
+                (multipoleAtomX < 0 || multipoleAtomX >= numParticles)) {
+            std::stringstream buffer;
+            buffer << "HippoNonbondedForce: invalid x axis particle: " << multipoleAtomX;
+            throw OpenMMException(buffer.str());
+        }
+        if ((axisType == HippoNonbondedForce::ZBisect || axisType == HippoNonbondedForce::ThreeFold) &&
+                (multipoleAtomY < 0 || multipoleAtomY >= numParticles)) {
+            std::stringstream buffer;
+            buffer << "HippoNonbondedForce: invalid y axis particle: " << multipoleAtomY;
+            throw OpenMMException(buffer.str());
+        }
+    }
+    kernel = context.getPlatform().createKernel(CalcHippoNonbondedForceKernel::Name(), context);
+    kernel.getAs<CalcHippoNonbondedForceKernel>().initialize(context.getSystem(), owner);
+}
+
+double HippoNonbondedForceImpl::calcForcesAndEnergy(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
+    if ((groups&(1<<owner.getForceGroup())) != 0)
+        return kernel.getAs<CalcHippoNonbondedForceKernel>().execute(context, includeForces, includeEnergy);
+    return 0.0;
+}
+
+std::vector<std::string> HippoNonbondedForceImpl::getKernelNames() {
+    std::vector<std::string> names;
+    names.push_back(CalcHippoNonbondedForceKernel::Name());
+    return names;
+}
+
+void HippoNonbondedForceImpl::getLabFramePermanentDipoles(ContextImpl& context, vector<Vec3>& dipoles) {
+    kernel.getAs<CalcHippoNonbondedForceKernel>().getLabFramePermanentDipoles(context, dipoles);
+}
+
+void HippoNonbondedForceImpl::getInducedDipoles(ContextImpl& context, vector<Vec3>& dipoles) {
+    kernel.getAs<CalcHippoNonbondedForceKernel>().getInducedDipoles(context, dipoles);
+}
+
+void HippoNonbondedForceImpl::updateParametersInContext(ContextImpl& context) {
+    kernel.getAs<CalcHippoNonbondedForceKernel>().copyParametersToContext(context, owner);
+    context.systemChanged();
+}
+
+void HippoNonbondedForceImpl::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
+    kernel.getAs<CalcHippoNonbondedForceKernel>().getPMEParameters(alpha, nx, ny, nz);
+}
+
+void HippoNonbondedForceImpl::getDPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
+    kernel.getAs<CalcHippoNonbondedForceKernel>().getDPMEParameters(alpha, nx, ny, nz);
+}
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2016 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2019 Stanford University and the Authors.      *
 * Authors: Mark Friedrichs, Peter Eastman                                    *
 * Contributors:                                                              *
 *                                                                            *
@@ -59,6 +59,7 @@ extern "C" OPENMM_EXPORT void registerKernelFactories() {
        platform.registerKernelFactory(CalcAmoebaGeneralizedKirkwoodForceKernel::Name(), factory);
        platform.registerKernelFactory(CalcAmoebaVdwForceKernel::Name(), factory);
        platform.registerKernelFactory(CalcAmoebaWcaDispersionForceKernel::Name(), factory);
+        platform.registerKernelFactory(CalcHippoNonbondedForceKernel::Name(), factory);
    }
    catch (...) {
        // Ignore.  The CUDA platform isn't available.
@@ -112,5 +113,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
    if (name == CalcAmoebaWcaDispersionForceKernel::Name())
        return new CudaCalcAmoebaWcaDispersionForceKernel(name, platform, cu, context.getSystem());

+    if (name == CalcHippoNonbondedForceKernel::Name())
+        return new CudaCalcHippoNonbondedForceKernel(name, platform, cu, context.getSystem());
+
    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
 }
\ No newline at end of file
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
@@ -32,6 +32,7 @@
 #include "openmm/System.h"
 #include "CudaArray.h"
 #include "CudaContext.h"
+#include "CudaNonbondedUtilities.h"
 #include "CudaSort.h"
 #include <cufft.h>

@@ -381,16 +382,6 @@ public:
    void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
 private:
    class ForceInfo;
-    class SortTrait : public CudaSort::SortTrait {
-        int getDataSize() const {return 8;}
-        int getKeySize() const {return 4;}
-        const char* getDataType() const {return "int2";}
-        const char* getKeyType() const {return "int";}
-        const char* getMinKey() const {return "(-2147483647 - 1)";}
-        const char* getMaxKey() const {return "2147483647";}
-        const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";}
-        const char* getSortKey() const {return "value.y";}
-    };
    void initializeScaleFactors();
    void computeInducedField(void** recipBoxVectorPointer);
    bool iterateDipolesByDIIS(int iteration);
@@ -451,14 +442,12 @@ private:
    CudaArray pmeBsplineModuliX;
    CudaArray pmeBsplineModuliY;
    CudaArray pmeBsplineModuliZ;
-    CudaArray pmeIgrid;
    CudaArray pmePhi;
    CudaArray pmePhid;
    CudaArray pmePhip;
    CudaArray pmePhidp;
    CudaArray pmeCphi;
    CudaArray lastPositions;
-    CudaSort* sort;
    cufftHandle fft;
    CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
    CUfunction pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
@@ -630,6 +619,136 @@ private:
    CUfunction forceKernel;
 };

+/**
+ * This kernel is invoked by HippoNonbondedForce to calculate the forces acting on the system and the energy of the system.
+ */
+class CudaCalcHippoNonbondedForceKernel : public CalcHippoNonbondedForceKernel {
+public:
+    CudaCalcHippoNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
+    ~CudaCalcHippoNonbondedForceKernel();
+    /**
+     * Initialize the kernel.
+     * 
+     * @param system     the System this kernel will be applied to
+     * @param force      the HippoNonbondedForce this kernel will be used for
+     */
+    void initialize(const System& system, const HippoNonbondedForce& force);
+    /**
+     * Execute the kernel to calculate the forces and/or energy.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param includeForces  true if forces should be calculated
+     * @param includeEnergy  true if the energy should be calculated
+     * @return the potential energy due to the force
+     */
+    double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
+    /**
+     * Get the induced dipole moments of all particles.
+     * 
+     * @param context    the Context for which to get the induced dipoles
+     * @param dipoles    the induced dipole moment of particle i is stored into the i'th element
+     */
+    void getInducedDipoles(ContextImpl& context, std::vector<Vec3>& dipoles);
+    /**
+     * Get the fixed dipole moments of all particles in the global reference frame.
+     * 
+     * @param context    the Context for which to get the fixed dipoles
+     * @param dipoles    the fixed dipole moment of particle i is stored into the i'th element
+     */
+    void getLabFramePermanentDipoles(ContextImpl& context, std::vector<Vec3>& dipoles);
+    /** 
+     * Calculate the electrostatic potential given vector of grid coordinates.
+     *
+     * @param context                      context
+     * @param inputGrid                    input grid coordinates
+     * @param outputElectrostaticPotential output potential 
+     */
+    void getElectrostaticPotential(ContextImpl& context, const std::vector< Vec3 >& inputGrid,
+                                   std::vector< double >& outputElectrostaticPotential);
+    /**
+     * Copy changed parameters over to a context.
+     *
+     * @param context    the context to copy parameters to
+     * @param force      the HippoNonbondedForce to copy the parameters from
+     */
+    void copyParametersToContext(ContextImpl& context, const HippoNonbondedForce& force);
+    /**
+     * Get the parameters being used for PME.
+     * 
+     * @param alpha   the separation parameter
+     * @param nx      the number of grid points along the X axis
+     * @param ny      the number of grid points along the Y axis
+     * @param nz      the number of grid points along the Z axis
+     */
+    void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
+    /**
+     * Get the parameters being used for dispersion PME.
+     * 
+     * @param alpha   the separation parameter
+     * @param nx      the number of grid points along the X axis
+     * @param ny      the number of grid points along the Y axis
+     * @param nz      the number of grid points along the Z axis
+     */
+    void getDPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
+private:
+    class ForceInfo;
+    class TorquePostComputation;
+    class SortTrait : public CudaSort::SortTrait {
+        int getDataSize() const {return 8;}
+        int getKeySize() const {return 4;}
+        const char* getDataType() const {return "int2";}
+        const char* getKeyType() const {return "int";}
+        const char* getMinKey() const {return "(-2147483647-1)";}
+        const char* getMaxKey() const {return "2147483647";}
+        const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";}
+        const char* getSortKey() const {return "value.y";}
+    };
+    void computeInducedField(void** recipBoxVectorPointer, int optOrder);
+    void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
+    void ensureMultipolesValid(ContextImpl& context);
+    void addTorquesToForces();
+    void createFieldKernel(const std::string& interactionSrc, std::vector<CudaArray*> params, CudaArray& fieldBuffer,
+        CUfunction& kernel, std::vector<void*>& args, CUfunction& exceptionKernel, std::vector<void*>& exceptionArgs,
+        CudaArray& exceptionScale);
+    int numParticles, maxExtrapolationOrder, maxTiles;
+    int gridSizeX, gridSizeY, gridSizeZ;
+    int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
+    double pmeAlpha, dpmeAlpha, cutoff;
+    bool usePME, hasInitializedKernels, hasInitializedFFT, multipolesAreValid;
+    std::vector<double> extrapolationCoefficients;
+    CudaContext& cu;
+    const System& system;
+    CudaArray multipoleParticles;
+    CudaArray coreCharge, valenceCharge, alpha, epsilon, damping, c6, pauliK, pauliQ, pauliAlpha, polarizability;
+    CudaArray localDipoles, labDipoles, fracDipoles;
+    CudaArray localQuadrupoles, labQuadrupoles[5], fracQuadrupoles;
+    CudaArray field;
+    CudaArray inducedField;
+    CudaArray torque;
+    CudaArray inducedDipole;
+    CudaArray extrapolatedDipole, extrapolatedPhi;
+    CudaArray pmeGrid1, pmeGrid2;
+    CudaArray pmeAtomGridIndex;
+    CudaArray pmeBsplineModuliX, pmeBsplineModuliY, pmeBsplineModuliZ;
+    CudaArray dpmeBsplineModuliX, dpmeBsplineModuliY, dpmeBsplineModuliZ;
+    CudaArray pmePhi, pmePhidp, pmeCphi;
+    CudaArray lastPositions;
+    CudaArray exceptionScales[5];
+    CudaArray exceptionAtoms;
+    CudaSort* sort;
+    cufftHandle fftForward, fftBackward, dfftForward, dfftBackward;
+    CUfunction computeMomentsKernel, fixedFieldKernel, fixedFieldExceptionKernel, mutualFieldKernel, mutualFieldExceptionKernel, computeExceptionsKernel;
+    CUfunction recordInducedDipolesKernel, mapTorqueKernel;
+    CUfunction pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
+    CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel;
+    CUfunction pmeSelfEnergyKernel;
+    CUfunction dpmeGridIndexKernel, dpmeSpreadChargeKernel, dpmeFinishSpreadChargeKernel, dpmeEvalEnergyKernel, dpmeConvolutionKernel, dpmeInterpolateForceKernel;
+    CUfunction initExtrapolatedKernel, iterateExtrapolatedKernel, computeExtrapolatedKernel, polarizationEnergyKernel;
+    CUfunction pmeTransformMultipolesKernel, pmeTransformPotentialKernel;
+    std::vector<void*> fixedFieldArgs, fixedFieldExceptionArgs, mutualFieldArgs, mutualFieldExceptionArgs, computeExceptionsArgs;
+    static const int PmeOrder = 5;
+};
+
 } // namespace OpenMM

 #endif /*AMOEBA_OPENMM_CUDAKERNELS_H*/
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoComputeField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoComputeField.cu
+__device__ void computeDirectFieldDampingFactors(real alpha, real r, real& fdamp3, real& fdamp5, real& fdamp7) {
+    real ar = alpha*r;
+    real ar2 = ar*ar;
+    real ar3 = ar2*ar;
+    real ar4 = ar2*ar2;
+    real expAR = EXP(-ar);
+    real one = 1;
+    fdamp3 = 1 - (1 + ar + ar2*(one/2))*expAR;
+    fdamp5 = 1 - (1 + ar + ar2*(one/2) + ar3*(one/6))*expAR;
+    fdamp7 = 1 - (1 + ar + ar2*(one/2) + ar3*(one/6) + ar4*(one/30))*expAR;
+}
+
+__device__ void computeMutualFieldDampingFactors(real alphaI, real alphaJ, real r, real& fdamp3, real& fdamp5) {
+    real arI = alphaI*r;
+    real arI2 = arI*arI;
+    real arI3 = arI2*arI;
+    real expARI = EXP(-arI);
+    real one = 1;
+    real seven = 7;
+    if (alphaI == alphaJ) {
+        real arI4 = arI3*arI;
+        real arI5 = arI4*arI;
+        fdamp3 = 1 - (1 + arI + arI2*(one/2) + arI3*(seven/48) + arI4*(one/48))*expARI;
+        fdamp5 = 1 - (1 + arI + arI2*(one/2) + arI3*(one/6) + arI4*(one/24) + arI5*(one/144))*expARI;
+    }
+    else {
+        real arJ = alphaJ*r;
+        real arJ2 = arJ*arJ;
+        real arJ3 = arJ2*arJ;
+        real expARJ = EXP(-arJ);
+        real aI2 = alphaI*alphaI;
+        real aJ2 = alphaJ*alphaJ;
+        real A = aJ2/(aJ2-aI2);
+        real B = aI2/(aI2-aJ2);
+        real A2 = A*A;
+        real B2 = B*B;
+        fdamp3 = 1 - A2*(1 + arI + arI2*(one/2))*expARI -
+                     B2*(1 + arJ + arJ2*(one/2))*expARJ -
+                     2*A2*B*(1 + arI)*expARI -
+                     2*B2*A*(1 + arJ)*expARJ;
+        fdamp5 = 1 - A2*(1 + arI + arI2*(one/2) + arI3*(one/6))*expARI -
+                     B2*(1 + arJ + arJ2*(one/2) + arJ3*(one/6))*expARJ -
+                     2*A2*B*(1 + arI + arI2*(one/3))*expARI -
+                     2*B2*A*(1 + arJ + arJ2*(one/3))*expARJ;
+    }
+}
+
+typedef struct {
+    real3 pos;
+    real3 field;
+    ATOM_PARAMETER_DATA
+} AtomData;
+
+/**
+ * Compute the electrostatic field.
+ */
+extern "C" __global__ void computeField(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions,
+        const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ fieldBuffers,
+#ifdef USE_CUTOFF
+        const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
+        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
+        const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
+#else
+        unsigned int numTiles
+#endif
+        PARAMETER_ARGUMENTS) {
+    const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
+    const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
+    const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
+    const unsigned int tbx = threadIdx.x - tgx;
+    __shared__ AtomData localData[THREAD_BLOCK_SIZE];
+
+    // First loop: process tiles that contain exclusions.
+    
+    const unsigned int firstExclusionTile = warp*NUM_TILES_WITH_EXCLUSIONS/totalWarps;
+    const unsigned int lastExclusionTile = (warp+1)*NUM_TILES_WITH_EXCLUSIONS/totalWarps;
+    for (int tile = firstExclusionTile; tile < lastExclusionTile; tile++) {
+        const ushort2 tileIndices = exclusionTiles[tile];
+        const unsigned int x = tileIndices.x;
+        const unsigned int y = tileIndices.y;
+        real3 field = make_real3(0);
+        unsigned int atom1 = x*TILE_SIZE + tgx;
+        real4 pos1 = posq[atom1];
+        LOAD_ATOM1_PARAMETERS
+        unsigned int excl = exclusions[tile*TILE_SIZE+tgx];
+        if (x == y) {
+            // This tile is on the diagonal.
+
+            const unsigned int localAtomIndex = threadIdx.x;
+            localData[localAtomIndex].pos = make_real3(pos1.x, pos1.y, pos1.z);
+            LOAD_LOCAL_PARAMETERS_FROM_1
+            for (unsigned int j = 0; j < TILE_SIZE; j++) {
+                int atom2 = tbx+j;
+                real3 pos2 = localData[atom2].pos;
+                real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
+#ifdef USE_PERIODIC
+                APPLY_PERIODIC_TO_DELTA(delta)
+#endif
+                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
+#ifdef USE_CUTOFF
+                if (r2 < CUTOFF_SQUARED) {
+#endif
+                    real invR = RSQRT(r2);
+                    real r = r2*invR;
+                    LOAD_ATOM2_PARAMETERS
+                    atom2 = y*TILE_SIZE+j;
+                    real3 tempField1 = make_real3(0);
+                    real3 tempField2 = make_real3(0);
+                    bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
+                    if (!isExcluded && atom1 != atom2) {
+                        COMPUTE_FIELD
+                    }
+                    field += tempField1;
+#ifdef USE_CUTOFF
+                }
+#endif
+                excl >>= 1;
+            }
+        }
+        else {
+            // This is an off-diagonal tile.
+
+            const unsigned int localAtomIndex = threadIdx.x;
+            unsigned int j = y*TILE_SIZE + tgx;
+            real4 tempPosq = posq[j];
+            localData[localAtomIndex].pos = make_real3(tempPosq.x, tempPosq.y, tempPosq.z);
+            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
+            localData[localAtomIndex].field = make_real3(0);
+            excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
+            unsigned int tj = tgx;
+            for (j = 0; j < TILE_SIZE; j++) {
+                int atom2 = tbx+tj;
+                real3 pos2 = localData[atom2].pos;
+                real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
+#ifdef USE_PERIODIC
+                APPLY_PERIODIC_TO_DELTA(delta)
+#endif
+                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
+#ifdef USE_CUTOFF
+                if (r2 < CUTOFF_SQUARED) {
+#endif
+                    real invR = RSQRT(r2);
+                    real r = r2*invR;
+                    LOAD_ATOM2_PARAMETERS
+                    atom2 = y*TILE_SIZE+tj;
+                    real3 tempField1 = make_real3(0);
+                    real3 tempField2 = make_real3(0);
+                    bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
+                    if (!isExcluded) {
+                        COMPUTE_FIELD
+                    }
+                    field += tempField1;
+                    localData[tbx+tj].field += tempField2;
+#ifdef USE_CUTOFF
+                }
+#endif
+                excl >>= 1;
+                tj = (tj + 1) & (TILE_SIZE - 1);
+            }
+        }
+
+        // Write results.
+
+        unsigned int offset1 = x*TILE_SIZE + tgx;
+        atomicAdd(&fieldBuffers[offset1], static_cast<unsigned long long>((long long) (field.x*0x100000000)));
+        atomicAdd(&fieldBuffers[offset1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (field.y*0x100000000)));
+        atomicAdd(&fieldBuffers[offset1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (field.z*0x100000000)));
+        if (x != y) {
+            unsigned int offset2 = y*TILE_SIZE + tgx;
+            atomicAdd(&fieldBuffers[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
+            atomicAdd(&fieldBuffers[offset2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
+            atomicAdd(&fieldBuffers[offset2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
+        }
+    }
+
+    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
+    // of them (no cutoff).
+
+#ifdef USE_CUTOFF
+    unsigned int numTiles = interactionCount[0];
+    if (numTiles > maxTiles)
+        return; // There wasn't enough memory for the neighbor list.
+    int tile = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
+    int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
+#else
+    int tile = (int) (warp*(long long)numTiles/totalWarps);
+    int end = (int) ((warp+1)*(long long)numTiles/totalWarps);
+#endif
+    int skipBase = 0;
+    int currentSkipIndex = tbx;
+    __shared__ int atomIndices[THREAD_BLOCK_SIZE];
+    __shared__ volatile int skipTiles[THREAD_BLOCK_SIZE];
+    skipTiles[threadIdx.x] = -1;
+    
+    while (tile < end) {
+        real3 field = make_real3(0);
+        bool includeTile = true;
+        
+        // Extract the coordinates of this tile.
+        
+        int x, y;
+        bool singlePeriodicCopy = false;
+#ifdef USE_CUTOFF
+        x = tiles[tile];
+        real4 blockSizeX = blockSize[x];
+        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
+                              0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
+                              0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
+#else
+        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*tile));
+        x = (tile-y*NUM_BLOCKS+y*(y+1)/2);
+        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
+            y += (x < y ? -1 : 1);
+            x = (tile-y*NUM_BLOCKS+y*(y+1)/2);
+        }
+
+        // Skip over tiles that have exclusions, since they were already processed.
+
+        while (skipTiles[tbx+TILE_SIZE-1] < tile) {
+            if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
+                ushort2 tile = exclusionTiles[skipBase+tgx];
+                skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
+            }
+            else
+                skipTiles[threadIdx.x] = end;
+            skipBase += TILE_SIZE;            
+            currentSkipIndex = tbx;
+        }
+        while (skipTiles[currentSkipIndex] < tile)
+            currentSkipIndex++;
+        includeTile = (skipTiles[currentSkipIndex] != tile);
+#endif
+        if (includeTile) {
+            unsigned int atom1 = x*TILE_SIZE + tgx;
+
+            // Load atom data for this tile.
+            
+            real4 pos1 = posq[atom1];
+            LOAD_ATOM1_PARAMETERS
+            const unsigned int localAtomIndex = threadIdx.x;
+#ifdef USE_CUTOFF
+            unsigned int j = interactingAtoms[tile*TILE_SIZE+tgx];
+#else
+            unsigned int j = y*TILE_SIZE + tgx;
+#endif
+            atomIndices[threadIdx.x] = j;
+            if (j < PADDED_NUM_ATOMS) {
+                real4 tempPosq = posq[j];
+                localData[localAtomIndex].pos = make_real3(tempPosq.x, tempPosq.y, tempPosq.z);
+                LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
+                localData[localAtomIndex].field = make_real3(0);
+            }
+#ifdef USE_PERIODIC
+            if (singlePeriodicCopy) {
+                // The box is small enough that we can just translate all the atoms into a single periodic
+                // box, then skip having to apply periodic boundary conditions later.
+
+                real4 blockCenterX = blockCenter[x];
+                APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
+                APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x].pos, blockCenterX)
+                unsigned int tj = tgx;
+                for (unsigned int j = 0; j < TILE_SIZE; j++) {
+                    int atom2 = tbx+tj;
+                    real3 pos2 = localData[atom2].pos;
+                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
+                    real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
+                    if (r2 < CUTOFF_SQUARED) {
+                        real invR = RSQRT(r2);
+                        real r = r2*invR;
+                        LOAD_ATOM2_PARAMETERS
+                        atom2 = atomIndices[tbx+tj];
+                        real3 tempField1 = make_real3(0);
+                        real3 tempField2 = make_real3(0);
+                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
+                            COMPUTE_FIELD
+                        }
+                        field += tempField1;
+                        localData[tbx+tj].field += tempField2;
+                    }
+                    tj = (tj + 1) & (TILE_SIZE - 1);
+                }
+            }
+            else
+#endif
+            {
+                // We need to apply periodic boundary conditions separately for each interaction.
+
+                unsigned int tj = tgx;
+                for (unsigned int j = 0; j < TILE_SIZE; j++) {
+                    int atom2 = tbx+tj;
+                    real3 pos2 = localData[atom2].pos;
+                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
+#ifdef USE_PERIODIC
+                    APPLY_PERIODIC_TO_DELTA(delta)
+#endif
+                    real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
+#ifdef USE_CUTOFF
+                    if (r2 < CUTOFF_SQUARED) {
+#endif
+                        real invR = RSQRT(r2);
+                        real r = r2*invR;
+                        LOAD_ATOM2_PARAMETERS
+                        atom2 = atomIndices[tbx+tj];
+                        real3 tempField1 = make_real3(0);
+                        real3 tempField2 = make_real3(0);
+                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
+                            COMPUTE_FIELD
+                        }
+                        field += tempField1;
+                        localData[tbx+tj].field += tempField2;
+#ifdef USE_CUTOFF
+                    }
+#endif
+                    tj = (tj + 1) & (TILE_SIZE - 1);
+                }
+            }
+        
+            // Write results.
+
+            atomicAdd(&fieldBuffers[atom1], static_cast<unsigned long long>((long long) (field.x*0x100000000)));
+            atomicAdd(&fieldBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (field.y*0x100000000)));
+            atomicAdd(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (field.z*0x100000000)));
+#ifdef USE_CUTOFF
+            unsigned int atom2 = atomIndices[threadIdx.x];
+#else
+            unsigned int atom2 = y*TILE_SIZE + tgx;
+#endif
+            if (atom2 < PADDED_NUM_ATOMS) {
+                atomicAdd(&fieldBuffers[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0x100000000)));
+                atomicAdd(&fieldBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0x100000000)));
+                atomicAdd(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0x100000000)));
+            }
+        }
+        tile++;
+    }
+}
+
+#define COMPUTING_EXCEPTIONS
+
+/**
+ * Compute the electrostatic field from nonbonded exceptions.
+ */
+extern "C" __global__ void computeFieldExceptions(const real4* __restrict__ posq, unsigned long long* __restrict__ fieldBuffers,
+        const int2* __restrict__ exceptionAtoms, const real* __restrict__ exceptionScale
+#ifdef USE_CUTOFF
+        , real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
+#endif
+        PARAMETER_ARGUMENTS) {
+    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_EXCEPTIONS; index += blockDim.x*gridDim.x) {
+        int2 atoms = exceptionAtoms[index];
+        int atom1 = atoms.x;
+        int atom2 = atoms.y;
+        real4 pos1 = posq[atom1];
+        real4 pos2 = posq[atom2];
+        LOAD_ATOM1_PARAMETERS
+        LOAD_ATOM2_PARAMETERS_FROM_GLOBAL
+        real scale = exceptionScale[index];
+        real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
+#ifdef USE_PERIODIC
+        APPLY_PERIODIC_TO_DELTA(delta)
+#endif
+        real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
+#ifdef USE_CUTOFF
+        if (r2 < CUTOFF_SQUARED) {
+#endif
+            real invR = RSQRT(r2);
+            real r = r2*invR;
+            real3 tempField1 = make_real3(0);
+            real3 tempField2 = make_real3(0);
+            COMPUTE_FIELD
+            atomicAdd(&fieldBuffers[atom1], static_cast<unsigned long long>((long long) (tempField1.x*0x100000000)));
+            atomicAdd(&fieldBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempField1.y*0x100000000)));
+            atomicAdd(&fieldBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempField1.z*0x100000000)));
+            atomicAdd(&fieldBuffers[atom2], static_cast<unsigned long long>((long long) (tempField2.x*0x100000000)));
+            atomicAdd(&fieldBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempField2.y*0x100000000)));
+            atomicAdd(&fieldBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempField2.z*0x100000000)));
+#ifdef USE_CUTOFF
+        }
+#endif
+    }
+}
\ No newline at end of file
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoFixedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoFixedField.cu
+real invR2 = invR*invR;
+real invR3 = invR*invR2;
+real invR5 = invR3*invR2;
+real invR7 = invR5*invR2;
+
+#if USE_EWALD
+// Calculate the error function damping terms.
+
+real ralpha = PME_ALPHA*r;
+real bn0 = erfc(ralpha)*invR;
+real alsq2 = 2*PME_ALPHA*PME_ALPHA;
+real alsq2n = 1/(SQRT_PI*PME_ALPHA);
+real exp2a = EXP(-(ralpha*ralpha));
+alsq2n *= alsq2;
+real bn1 = (bn0+alsq2n*exp2a)*invR2;
+alsq2n *= alsq2;
+real bn2 = (3*bn1+alsq2n*exp2a)*invR2;
+alsq2n *= alsq2;
+real bn3 = (5*bn2+alsq2n*exp2a)*invR2;
+#endif
+
+// Calculate the field at particle 1 due to multipoles at particle 2
+
+real fdamp3, fdamp5, fdamp7;
+computeDirectFieldDampingFactors(alpha2, r, fdamp3, fdamp5, fdamp7);
+#ifndef COMPUTING_EXCEPTIONS
+real scale = 1;
+#endif
+#ifdef USE_EWALD
+real rr3 = bn1 - (1-scale)*invR3;
+real rr3j = bn1 - (1-scale*fdamp3)*invR3;
+real rr5j = bn2 - (1-scale*fdamp5)*3*invR5;
+real rr7j = bn3 - (1-scale*fdamp7)*15*invR7;
+#else
+real rr3 = scale*invR3;
+real rr3j = scale*fdamp3*invR3;
+real rr5j = scale*3*fdamp5*invR5;
+real rr7j = scale*15*fdamp7*invR7;
+#endif
+real qZZ2 = -qXX2-qYY2;
+real3 qDotDelta2 = make_real3(delta.x*qXX2 + delta.y*qXY2 + delta.z*qXZ2,
+                              delta.x*qXY2 + delta.y*qYY2 + delta.z*qYZ2,
+                              delta.x*qXZ2 + delta.y*qYZ2 + delta.z*qZZ2);
+real dipoleDelta2 = dot(dipole2, delta);
+real qdpoleDelta2 = dot(qDotDelta2, delta);
+real factor2 = rr3*coreCharge2 + rr3j*valenceCharge2 - rr5j*dipoleDelta2 + rr7j*qdpoleDelta2;
+tempField1 = -delta*factor2 - dipole2*rr3j + qDotDelta2*2*rr5j;
+
+// Calculate the field at particle 2 due to multipoles at particle 1
+
+computeDirectFieldDampingFactors(alpha1, r, fdamp3, fdamp5, fdamp7);
+#ifdef USE_EWALD
+real rr3i = bn1 - (1-scale*fdamp3)*invR3;
+real rr5i = bn2 - (1-scale*fdamp5)*3*invR5;
+real rr7i = bn3 - (1-scale*fdamp7)*15*invR7;
+#else
+real rr3i = scale*fdamp3*invR3;
+real rr5i = scale*3*fdamp5*invR5;
+real rr7i = scale*15*fdamp7*invR7;
+#endif
+real qZZ1 = -qXX1-qYY1;
+real3 qDotDelta1 = make_real3(delta.x*qXX1 + delta.y*qXY1 + delta.z*qXZ1,
+                              delta.x*qXY1 + delta.y*qYY1 + delta.z*qYZ1,
+                              delta.x*qXZ1 + delta.y*qYZ1 + delta.z*qZZ1);
+real dipoleDelta1 = dot(dipole1, delta);
+real qdpoleDelta1 = dot(qDotDelta1, delta);
+real factor1 = rr3*coreCharge1 + rr3i*valenceCharge1 + rr5i*dipoleDelta1 + rr7i*qdpoleDelta1;
+tempField2 = delta*factor1 - dipole1*rr3i - qDotDelta1*2*rr5i;