Cleanup to AmoebaMultipoleForce (#3068)

* Cleanup to CUDA AmoebaMultipoleForce * Deleted obsolete SOR code

Cleanup to AmoebaMultipoleForce (#3068)
* Cleanup to CUDA AmoebaMultipoleForce * Deleted obsolete SOR code
07b7d2d3 · Peter Eastman · GitHub · 8c5479c5 · 07b7d2d3 · 07b7d2d3
Unverified Commit 07b7d2d3 authored Mar 16, 2021 by Peter Eastman Committed by GitHub Mar 16, 2021
10 changed files
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
@@ -223,8 +223,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
    double4* posqd = (double4*) &temp[0];
    vector<float2> dampingAndTholeVec;
    vector<float> polarizabilityVec;
-    vector<float> molecularDipolesVec;
-    vector<float> molecularQuadrupolesVec;
+    vector<float> localDipolesVec;
+    vector<float> localQuadrupolesVec;
    vector<int4> multipoleParticlesVec;
    for (int i = 0; i < numMultipoles; i++) {
        double charge, thole, damping, polarity;
@@ -239,15 +239,15 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
        polarizabilityVec.push_back((float) polarity);
        multipoleParticlesVec.push_back(make_int4(atomX, atomY, atomZ, axisType));
        for (int j = 0; j < 3; j++)
-            molecularDipolesVec.push_back((float) dipole[j]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[0]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[1]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[2]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[4]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[5]);
+            localDipolesVec.push_back((float) dipole[j]);
+        localQuadrupolesVec.push_back((float) quadrupole[0]);
+        localQuadrupolesVec.push_back((float) quadrupole[1]);
+        localQuadrupolesVec.push_back((float) quadrupole[2]);
+        localQuadrupolesVec.push_back((float) quadrupole[4]);
+        localQuadrupolesVec.push_back((float) quadrupole[5]);
    }
    hasQuadrupoles = false;
-    for (auto q : molecularQuadrupolesVec)
+    for (auto q : localQuadrupolesVec)
        if (q != 0.0)
            hasQuadrupoles = true;
    int paddedNumAtoms = cu.getPaddedNumAtoms();
@@ -256,38 +256,38 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
        polarizabilityVec.push_back(0);
        multipoleParticlesVec.push_back(make_int4(0, 0, 0, 0));
        for (int j = 0; j < 3; j++)
-            molecularDipolesVec.push_back(0);
+            localDipolesVec.push_back(0);
        for (int j = 0; j < 5; j++)
-            molecularQuadrupolesVec.push_back(0);
+            localQuadrupolesVec.push_back(0);
    }
    dampingAndThole.initialize<float2>(cu, paddedNumAtoms, "dampingAndThole");
    polarizability.initialize<float>(cu, paddedNumAtoms, "polarizability");
    multipoleParticles.initialize<int4>(cu, paddedNumAtoms, "multipoleParticles");
-    molecularDipoles.initialize<float>(cu, 3*paddedNumAtoms, "molecularDipoles");
-    molecularQuadrupoles.initialize<float>(cu, 5*paddedNumAtoms, "molecularQuadrupoles");
+    localDipoles.initialize<float>(cu, 3*paddedNumAtoms, "localDipoles");
+    localQuadrupoles.initialize<float>(cu, 5*paddedNumAtoms, "localQuadrupoles");
    lastPositions.initialize(cu, cu.getPosq().getSize(), cu.getPosq().getElementSize(), "lastPositions");
    dampingAndThole.upload(dampingAndTholeVec);
    polarizability.upload(polarizabilityVec);
    multipoleParticles.upload(multipoleParticlesVec);
-    molecularDipoles.upload(molecularDipolesVec);
-    molecularQuadrupoles.upload(molecularQuadrupolesVec);
+    localDipoles.upload(localDipolesVec);
+    localQuadrupoles.upload(localQuadrupolesVec);
    posq.upload(&temp[0]);
    
    // Create workspace arrays.
    
    polarizationType = force.getPolarizationType();
    int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
-    labFrameDipoles.initialize(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
-    labFrameQuadrupoles.initialize(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
+    labDipoles.initialize(cu, paddedNumAtoms, 3*elementSize, "labDipoles");
+    labQuadrupoles.initialize(cu, 5*paddedNumAtoms, elementSize, "labQuadrupoles");
    sphericalDipoles.initialize(cu, 3*paddedNumAtoms, elementSize, "sphericalDipoles");
    sphericalQuadrupoles.initialize(cu, 5*paddedNumAtoms, elementSize, "sphericalQuadrupoles");
-    fracDipoles.initialize(cu, 3*paddedNumAtoms, elementSize, "fracDipoles");
+    fracDipoles.initialize(cu, paddedNumAtoms, 3*elementSize, "fracDipoles");
    fracQuadrupoles.initialize(cu, 6*paddedNumAtoms, elementSize, "fracQuadrupoles");
    field.initialize(cu, 3*paddedNumAtoms, sizeof(long long), "field");
    fieldPolar.initialize(cu, 3*paddedNumAtoms, sizeof(long long), "fieldPolar");
    torque.initialize(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
-    inducedDipole.initialize(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
-    inducedDipolePolar.initialize(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
+    inducedDipole.initialize(cu, paddedNumAtoms, 3*elementSize, "inducedDipole");
+    inducedDipolePolar.initialize(cu, paddedNumAtoms, 3*elementSize, "inducedDipolePolar");
    if (polarizationType == AmoebaMultipoleForce::Mutual) {
        inducedDipoleErrors.initialize(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
        prevDipoles.initialize(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
@@ -423,12 +423,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
    defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
    if (usePME) {
        int nx, ny, nz;
-        force.getPMEParameters(alpha, nx, ny, nz);
-        if (nx == 0 || alpha == 0.0) {
+        force.getPMEParameters(pmeAlpha, nx, ny, nz);
+        if (nx == 0 || pmeAlpha == 0) {
            NonbondedForce nb;
            nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
            nb.setCutoffDistance(force.getCutoffDistance());
-            NonbondedForceImpl::calcPMEParameters(system, nb, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
+            NonbondedForceImpl::calcPMEParameters(system, nb, pmeAlpha, gridSizeX, gridSizeY, gridSizeZ, false);
            gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
            gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
            gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
@@ -437,7 +437,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
            gridSizeY = CudaFFT3D::findLegalDimension(ny);
            gridSizeZ = CudaFFT3D::findLegalDimension(nz);
        }
-        defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
+        defines["EWALD_ALPHA"] = cu.doubleToString(pmeAlpha);
        defines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
        defines["USE_EWALD"] = "";
        defines["USE_CUTOFF"] = "";
@@ -512,7 +512,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
        // Create the PME kernels.

        map<string, string> pmeDefines;
-        pmeDefines["EWALD_ALPHA"] = cu.doubleToString(alpha);
+        pmeDefines["EWALD_ALPHA"] = cu.doubleToString(pmeAlpha);
        pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
        pmeDefines["NUM_ATOMS"] = cu.intToString(numMultipoles);
        pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
@@ -563,7 +563,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
            throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
        hasInitializedFFT = true;

-        // Initialize the b-spline moduli.
+        // Initialize the B-spline moduli.

        double data[PmeOrder];
        double x = 0.0;
@@ -756,21 +756,20 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
    // Compute the lab frame moments.

    void* computeMomentsArgs[] = {&cu.getPosq().getDevicePointer(), &multipoleParticles.getDevicePointer(),
-        &molecularDipoles.getDevicePointer(), &molecularQuadrupoles.getDevicePointer(),
-        &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
+        &localDipoles.getDevicePointer(), &localQuadrupoles.getDevicePointer(),
+        &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
        &sphericalDipoles.getDevicePointer(), &sphericalQuadrupoles.getDevicePointer()};
    cu.executeKernel(computeMomentsKernel, computeMomentsArgs, cu.getNumAtoms());
    int startTileIndex = nb.getStartTileIndex();
    int numTileIndices = nb.getNumTiles();
    int numForceThreadBlocks = nb.getNumForceThreadBlocks();
-    int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
    if (!pmeGrid.isInitialized()) {
        // Compute induced dipoles.
        
        if (gkKernel == NULL) {
            void* computeFixedFieldArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(), &cu.getPosq().getDevicePointer(),
                &covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(), &nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
-                &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
+                &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
            cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*fixedFieldThreads, fixedFieldThreads);
            void* recordInducedDipolesArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(),
                &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &polarizability.getDevicePointer()};
@@ -781,7 +780,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
            void* computeFixedFieldArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(), &cu.getPosq().getDevicePointer(),
                &covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(), &nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
                &gkKernel->getBornRadii().getDevicePointer(), &gkKernel->getField().getDevicePointer(),
-                &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
+                &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
            cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*fixedFieldThreads, fixedFieldThreads);
            void* recordInducedDipolesArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(),
                &gkKernel->getField().getDevicePointer(), &gkKernel->getInducedDipoles().getDevicePointer(),
@@ -810,7 +809,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
            &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &dampingAndThole.getDevicePointer()};
        cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads);
        if (gkKernel != NULL)
-            gkKernel->finishComputation(torque, labFrameDipoles, labFrameQuadrupoles, inducedDipole, inducedDipolePolar, dampingAndThole, covalentFlags, polarizationGroupFlags);
+            gkKernel->finishComputation(torque, labDipoles, labQuadrupoles, inducedDipole, inducedDipolePolar, dampingAndThole, covalentFlags, polarizationGroupFlags);
    }
    else {
        // Compute reciprocal box vectors.
@@ -842,7 +841,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
        // Reciprocal space calculation.
        
        unsigned int maxTiles = nb.getInteractingTiles().getSize();
-        void* pmeTransformMultipolesArgs[] = {&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
+        void* pmeTransformMultipolesArgs[] = {&labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
            &fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
        cu.executeKernel(pmeTransformMultipolesKernel, pmeTransformMultipolesArgs, cu.getNumAtoms());
        void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(),
@@ -864,14 +863,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
        else
            cufftExecC2C(fft, (float2*) pmeGrid.getDevicePointer(), (float2*) pmeGrid.getDevicePointer(), CUFFT_INVERSE);
        void* pmeFixedPotentialArgs[] = {&pmeGrid.getDevicePointer(), &pmePhi.getDevicePointer(), &field.getDevicePointer(),
-            &fieldPolar .getDevicePointer(), &cu.getPosq().getDevicePointer(), &labFrameDipoles.getDevicePointer(),
+            &fieldPolar .getDevicePointer(), &cu.getPosq().getDevicePointer(), &labDipoles.getDevicePointer(),
            cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
            recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
        cu.executeKernel(pmeFixedPotentialKernel, pmeFixedPotentialArgs, cu.getNumAtoms());
        void* pmeTransformFixedPotentialArgs[] = {&pmePhi.getDevicePointer(), &pmeCphi.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
        cu.executeKernel(pmeTransformPotentialKernel, pmeTransformFixedPotentialArgs, cu.getNumAtoms());
        void* pmeFixedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque.getDevicePointer(),
-            &cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
+            &cu.getEnergyBuffer().getDevicePointer(), &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
            &fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(), &pmePhi.getDevicePointer(), &pmeCphi.getDevicePointer(),
            recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
        cu.executeKernel(pmeFixedForceKernel, pmeFixedForceArgs, cu.getNumAtoms());
@@ -883,7 +882,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
            &nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
            cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
            &maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
-            &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
+            &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
        cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*fixedFieldThreads, fixedFieldThreads);
        void* recordInducedDipolesArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(),
            &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &polarizability.getDevicePointer()};
@@ -937,7 +936,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
        void* pmeTransformInducedPotentialArgs[] = {&pmePhidp.getDevicePointer(), &pmeCphi.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
        cu.executeKernel(pmeTransformPotentialKernel, pmeTransformInducedPotentialArgs, cu.getNumAtoms());
        void* pmeInducedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque.getDevicePointer(),
-            &cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
+            &cu.getEnergyBuffer().getDevicePointer(), &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
            &fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(),
            &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &pmePhi.getDevicePointer(), &pmePhid.getDevicePointer(),
            &pmePhip.getDevicePointer(), &pmePhidp.getDevicePointer(), &pmeCphi.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
@@ -1220,23 +1219,22 @@ void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& cont
        context.calcForcesAndEnergy(false, false, context.getIntegrator().getIntegrationForceGroups());
 }

-
 void CudaCalcAmoebaMultipoleForceKernel::getLabFramePermanentDipoles(ContextImpl& context, vector<Vec3>& dipoles) {
    ensureMultipolesValid(context);
    int numParticles = cu.getNumAtoms();
    dipoles.resize(numParticles);
    const vector<int>& order = cu.getAtomIndex();
    if (cu.getUseDoublePrecision()) {
-        vector<double> labDipoleVec;
-        labFrameDipoles.download(labDipoleVec);
+        vector<double3> labDipoleVec;
+        labDipoles.download(labDipoleVec);
        for (int i = 0; i < numParticles; i++)
-            dipoles[order[i]] = Vec3(labDipoleVec[3*i], labDipoleVec[3*i+1], labDipoleVec[3*i+2]);
+            dipoles[order[i]] = Vec3(labDipoleVec[i].x, labDipoleVec[i].y, labDipoleVec[i].z);
    }
    else {
-        vector<float> labDipoleVec;
-        labFrameDipoles.download(labDipoleVec);
+        vector<float3> labDipoleVec;
+        labDipoles.download(labDipoleVec);
        for (int i = 0; i < numParticles; i++)
-            dipoles[order[i]] = Vec3(labDipoleVec[3*i], labDipoleVec[3*i+1], labDipoleVec[3*i+2]);
+            dipoles[order[i]] = Vec3(labDipoleVec[i].x, labDipoleVec[i].y, labDipoleVec[i].z);
    }
 }

@@ -1247,16 +1245,16 @@ void CudaCalcAmoebaMultipoleForceKernel::getInducedDipoles(ContextImpl& context,
    dipoles.resize(numParticles);
    const vector<int>& order = cu.getAtomIndex();
    if (cu.getUseDoublePrecision()) {
-        vector<double> d;
+        vector<double3> d;
        inducedDipole.download(d);
        for (int i = 0; i < numParticles; i++)
-            dipoles[order[i]] = Vec3(d[3*i], d[3*i+1], d[3*i+2]);
+            dipoles[order[i]] = Vec3(d[i].x, d[i].y, d[i].z);
    }
    else {
-        vector<float> d;
+        vector<float3> d;
        inducedDipole.download(d);
        for (int i = 0; i < numParticles; i++)
-            dipoles[order[i]] = Vec3(d[3*i], d[3*i+1], d[3*i+2]);
+            dipoles[order[i]] = Vec3(d[i].x, d[i].y, d[i].z);
    }
 }

@@ -1268,35 +1266,35 @@ void CudaCalcAmoebaMultipoleForceKernel::getTotalDipoles(ContextImpl& context, v
    const vector<int>& order = cu.getAtomIndex();
    if (cu.getUseDoublePrecision()) {
        vector<double4> posqVec;
-        vector<double> labDipoleVec;
-        vector<double> inducedDipoleVec;
+        vector<double3> labDipoleVec;
+        vector<double3> inducedDipoleVec;
        double totalDipoleVecX;
        double totalDipoleVecY;
        double totalDipoleVecZ;
        inducedDipole.download(inducedDipoleVec);
-        labFrameDipoles.download(labDipoleVec);
+        labDipoles.download(labDipoleVec);
        cu.getPosq().download(posqVec);
        for (int i = 0; i < numParticles; i++) {
-            totalDipoleVecX = labDipoleVec[3*i] + inducedDipoleVec[3*i];
-            totalDipoleVecY = labDipoleVec[3*i+1] + inducedDipoleVec[3*i+1];
-            totalDipoleVecZ = labDipoleVec[3*i+2] + inducedDipoleVec[3*i+2];
+            totalDipoleVecX = labDipoleVec[i].x + inducedDipoleVec[i].x;
+            totalDipoleVecY = labDipoleVec[i].y + inducedDipoleVec[i].y;
+            totalDipoleVecZ = labDipoleVec[i].z + inducedDipoleVec[i].z;
            dipoles[order[i]] = Vec3(totalDipoleVecX, totalDipoleVecY, totalDipoleVecZ);
        }
    }
    else {
        vector<float4> posqVec;
-        vector<float> labDipoleVec;
-        vector<float> inducedDipoleVec;
+        vector<float3> labDipoleVec;
+        vector<float3> inducedDipoleVec;
        float totalDipoleVecX;
        float totalDipoleVecY;
        float totalDipoleVecZ;
        inducedDipole.download(inducedDipoleVec);
-        labFrameDipoles.download(labDipoleVec);
+        labDipoles.download(labDipoleVec);
        cu.getPosq().download(posqVec);
        for (int i = 0; i < numParticles; i++) {
-            totalDipoleVecX = labDipoleVec[3*i] + inducedDipoleVec[3*i];
-            totalDipoleVecY = labDipoleVec[3*i+1] + inducedDipoleVec[3*i+1];
-            totalDipoleVecZ = labDipoleVec[3*i+2] + inducedDipoleVec[3*i+2];
+            totalDipoleVecX = labDipoleVec[i].x + inducedDipoleVec[i].x;
+            totalDipoleVecY = labDipoleVec[i].y + inducedDipoleVec[i].y;
+            totalDipoleVecZ = labDipoleVec[i].z + inducedDipoleVec[i].z;
            dipoles[order[i]] = Vec3(totalDipoleVecX, totalDipoleVecY, totalDipoleVecZ);
        }
    }
@@ -1326,8 +1324,8 @@ void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl&
    
    // Compute the potential.
    
-    void* computePotentialArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles.getDevicePointer(),
-        &labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &points.getDevicePointer(),
+    void* computePotentialArgs[] = {&cu.getPosq().getDevicePointer(), &labDipoles.getDevicePointer(),
+        &labQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &points.getDevicePointer(),
        &potential.getDevicePointer(), &numPoints, cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(),
        cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer()};
    int blockSize = 128;
@@ -1343,7 +1341,7 @@ void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl&
    }
 }

-template <class T, class T4, class M4>
+template <class T, class T3, class T4, class M4>
 void CudaCalcAmoebaMultipoleForceKernel::computeSystemMultipoleMoments(ContextImpl& context, vector<double>& outputMultipoleMoments) {
    // Compute the local coordinates relative to the center of mass.
    int numAtoms = cu.getNumAtoms();
@@ -1388,15 +1386,16 @@ void CudaCalcAmoebaMultipoleForceKernel::computeSystemMultipoleMoments(ContextIm
    double zxqdp = 0.0;
    double zyqdp = 0.0;
    double zzqdp = 0.0;
-    vector<T> labDipoleVec, inducedDipoleVec, quadrupoleVec;
-    labFrameDipoles.download(labDipoleVec);
+    vector<T3> labDipoleVec, inducedDipoleVec;
+    vector<T> quadrupoleVec;
+    labDipoles.download(labDipoleVec);
    inducedDipole.download(inducedDipoleVec);
-    labFrameQuadrupoles.download(quadrupoleVec);
+    labQuadrupoles.download(quadrupoleVec);
    for (int i = 0; i < numAtoms; i++) {
        totalCharge += posqLocal[i].w;
-        double netDipoleX = (labDipoleVec[3*i]    + inducedDipoleVec[3*i]);
-        double netDipoleY = (labDipoleVec[3*i+1]  + inducedDipoleVec[3*i+1]);
-        double netDipoleZ = (labDipoleVec[3*i+2]  + inducedDipoleVec[3*i+2]);
+        double netDipoleX = (labDipoleVec[i].x + inducedDipoleVec[i].x);
+        double netDipoleY = (labDipoleVec[i].y + inducedDipoleVec[i].y);
+        double netDipoleZ = (labDipoleVec[i].z + inducedDipoleVec[i].z);
        xdpl += posqLocal[i].x*posqLocal[i].w + netDipoleX;
        ydpl += posqLocal[i].y*posqLocal[i].w + netDipoleY;
        zdpl += posqLocal[i].z*posqLocal[i].w + netDipoleZ;
@@ -1459,11 +1458,11 @@ void CudaCalcAmoebaMultipoleForceKernel::computeSystemMultipoleMoments(ContextIm
 void CudaCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextImpl& context, vector<double>& outputMultipoleMoments) {
    ensureMultipolesValid(context);
    if (cu.getUseDoublePrecision())
-        computeSystemMultipoleMoments<double, double4, double4>(context, outputMultipoleMoments);
+        computeSystemMultipoleMoments<double, double3, double4, double4>(context, outputMultipoleMoments);
    else if (cu.getUseMixedPrecision())
-        computeSystemMultipoleMoments<float, float4, double4>(context, outputMultipoleMoments);
+        computeSystemMultipoleMoments<float, float3, float4, double4>(context, outputMultipoleMoments);
    else
-        computeSystemMultipoleMoments<float, float4, float4>(context, outputMultipoleMoments);
+        computeSystemMultipoleMoments<float, float3, float4, float4>(context, outputMultipoleMoments);
 }

 void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& context, const AmoebaMultipoleForce& force) {
@@ -1480,8 +1479,8 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
    double4* posqd = (double4*) cu.getPinnedBuffer();
    vector<float2> dampingAndTholeVec;
    vector<float> polarizabilityVec;
-    vector<float> molecularDipolesVec;
-    vector<float> molecularQuadrupolesVec;
+    vector<float> localDipolesVec;
+    vector<float> localQuadrupolesVec;
    vector<int4> multipoleParticlesVec;
    for (int i = 0; i < force.getNumMultipoles(); i++) {
        double charge, thole, damping, polarity;
@@ -1496,15 +1495,15 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
        polarizabilityVec.push_back((float) polarity);
        multipoleParticlesVec.push_back(make_int4(atomX, atomY, atomZ, axisType));
        for (int j = 0; j < 3; j++)
-            molecularDipolesVec.push_back((float) dipole[j]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[0]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[1]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[2]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[4]);
-        molecularQuadrupolesVec.push_back((float) quadrupole[5]);
+            localDipolesVec.push_back((float) dipole[j]);
+        localQuadrupolesVec.push_back((float) quadrupole[0]);
+        localQuadrupolesVec.push_back((float) quadrupole[1]);
+        localQuadrupolesVec.push_back((float) quadrupole[2]);
+        localQuadrupolesVec.push_back((float) quadrupole[4]);
+        localQuadrupolesVec.push_back((float) quadrupole[5]);
    }
    if (!hasQuadrupoles) {
-        for (auto q : molecularQuadrupolesVec)
+        for (auto q : localQuadrupolesVec)
            if (q != 0.0)
                throw OpenMMException("updateParametersInContext: Cannot set a non-zero quadrupole moment, because quadrupoles were excluded from the kernel");
    }
@@ -1513,15 +1512,15 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
        polarizabilityVec.push_back(0);
        multipoleParticlesVec.push_back(make_int4(0, 0, 0, 0));
        for (int j = 0; j < 3; j++)
-            molecularDipolesVec.push_back(0);
+            localDipolesVec.push_back(0);
        for (int j = 0; j < 5; j++)
-            molecularQuadrupolesVec.push_back(0);
+            localQuadrupolesVec.push_back(0);
    }
    dampingAndThole.upload(dampingAndTholeVec);
    polarizability.upload(polarizabilityVec);
    multipoleParticles.upload(multipoleParticlesVec);
-    molecularDipoles.upload(molecularDipolesVec);
-    molecularQuadrupoles.upload(molecularQuadrupolesVec);
+    localDipoles.upload(localDipolesVec);
+    localQuadrupoles.upload(localQuadrupolesVec);
    cu.getPosq().upload(cu.getPinnedBuffer());
    cu.invalidateMolecules();
    multipolesAreValid = false;
@@ -1530,7 +1529,7 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
 void CudaCalcAmoebaMultipoleForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
    if (!usePME)
        throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
-    alpha = this->alpha;
+    alpha = pmeAlpha;
    nx = gridSizeX;
    ny = gridSizeY;
    nz = gridSizeZ;
@@ -1712,7 +1711,7 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::computeBornRadii() {
    cu.executeKernel(reduceBornSumKernel, reduceBornSumArgs, cu.getNumAtoms());
 }

-void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles,
+void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labDipoles, CudaArray& labQuadrupoles,
            CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags) {
    CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
    int startTileIndex = nb.getStartTileIndex();
@@ -1722,8 +1721,8 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray&
    // Compute the GK force.
    
    void* gkForceArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
-        &cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices, &labFrameDipoles.getDevicePointer(),
-        &labFrameQuadrupoles.getDevicePointer(), &inducedDipoleS.getDevicePointer(), &inducedDipolePolarS.getDevicePointer(),
+        &cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices, &labDipoles.getDevicePointer(),
+        &labQuadrupoles.getDevicePointer(), &inducedDipoleS.getDevicePointer(), &inducedDipolePolarS.getDevicePointer(),
        &bornRadii.getDevicePointer(), &bornForce.getDevicePointer()};
    cu.executeKernel(gkForceKernel, gkForceArgs, numForceThreadBlocks*gkForceThreads, gkForceThreads);

@@ -1742,7 +1741,7 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray&
    void* ediffArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
        &cu.getPosq().getDevicePointer(), &covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(),
        &nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
-        &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(),
+        &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(),
        &inducedDipolePolar.getDevicePointer(), &inducedDipoleS.getDevicePointer(), &inducedDipolePolarS.getDevicePointer(),
        &dampingAndThole.getDevicePointer()};
    cu.executeKernel(ediffKernel, ediffArgs, numForceThreadBlocks*ediffThreads, ediffThreads);

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
@@ -161,11 +161,11 @@ private:
    bool iterateDipolesByDIIS(int iteration);
    void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
    void ensureMultipolesValid(ContextImpl& context);
-    template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
+    template <class T, class T3, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
    int numMultipoles, maxInducedIterations, maxExtrapolationOrder;
    int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
    int gridSizeX, gridSizeY, gridSizeZ;
-    double alpha, inducedEpsilon;
+    double pmeAlpha, inducedEpsilon;
    bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid, hasCreatedEvent;
    AmoebaMultipoleForce::PolarizationType polarizationType;
    CudaContext& cu;
@@ -173,10 +173,10 @@ private:
    std::vector<int3> covalentFlagValues;
    std::vector<int2> polarizationFlagValues;
    CudaArray multipoleParticles;
-    CudaArray molecularDipoles;
-    CudaArray molecularQuadrupoles;
-    CudaArray labFrameDipoles;
-    CudaArray labFrameQuadrupoles;
+    CudaArray localDipoles;
+    CudaArray localQuadrupoles;
+    CudaArray labDipoles;
+    CudaArray labQuadrupoles;
    CudaArray sphericalDipoles;
    CudaArray sphericalQuadrupoles;
    CudaArray fracDipoles;

--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaGk.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaGk.cu
@@ -189,7 +189,7 @@ __device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& at
 __device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2);

 inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
-        const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
+        const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
    real4 atomPosq = posq[atom];
    data.pos = trimTo3(atomPosq);
    data.q = atomPosq.w;
@@ -202,12 +202,8 @@ inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __r
    data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
    data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
    data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
-    data.inducedDipole.x = inducedDipole[atom*3];
-    data.inducedDipole.y = inducedDipole[atom*3+1];
-    data.inducedDipole.z = inducedDipole[atom*3+2];
-    data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
-    data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
-    data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
+    data.inducedDipole = inducedDipole[atom];
+    data.inducedDipolePolar = inducedDipolePolar[atom];
    data.bornRadius = bornRadius[atom];
 }

@@ -222,7 +218,7 @@ inline __device__ void zeroAtomData(AtomData2& data) {
 extern "C" __global__ void computeGKForces(
        unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
        const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole,
-        const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
+        const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
        const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) {
    unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
    unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
@@ -565,8 +561,8 @@ __device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData
 __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce);

 inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
-        const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
-        const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) {
+        const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
+        const real3* __restrict__ inducedDipoleS, const real3* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) {
    real4 atomPosq = posq[atom];
    data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
    data.q = atomPosq.w;
@@ -579,18 +575,10 @@ inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __r
    data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
    data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
    data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
-    data.inducedDipole.x = inducedDipole[atom*3];
-    data.inducedDipole.y = inducedDipole[atom*3+1];
-    data.inducedDipole.z = inducedDipole[atom*3+2];
-    data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
-    data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
-    data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
-    data.inducedDipoleS.x = inducedDipoleS[atom*3];
-    data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
-    data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
-    data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
-    data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
-    data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
+    data.inducedDipole = inducedDipole[atom];
+    data.inducedDipolePolar = inducedDipolePolar[atom];
+    data.inducedDipoleS = inducedDipoleS[atom];
+    data.inducedDipolePolarS = inducedDipolePolarS[atom];
    float2 temp = dampingAndThole[atom];
    data.damp = temp.x;
    data.thole = temp.y;
@@ -615,8 +603,8 @@ extern "C" __global__ void computeEDiffForce(
        unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
        const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
        const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
-        const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
-        const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS,
+        const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole,
+        const real3* __restrict__ inducedDipolePolar, const real3* __restrict__ inducedDipoleS, const real3* __restrict__ inducedDipolePolarS,
        const float2* __restrict__ dampingAndThole) {
    const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
    const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;

--- a/plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
@@ -10,7 +10,7 @@ typedef struct {
 } AtomData;

 inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
-        const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
+        const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
    real4 atomPosq = posq[atom];
    data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
    data.q = atomPosq.w;
@@ -24,12 +24,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
    data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
    data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
 #endif
-    data.inducedDipole.x = inducedDipole[atom*3];
-    data.inducedDipole.y = inducedDipole[atom*3+1];
-    data.inducedDipole.z = inducedDipole[atom*3+2];
-    data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
-    data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
-    data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
+    data.inducedDipole = inducedDipole[atom];
+    data.inducedDipolePolar = inducedDipolePolar[atom];
    float2 temp = dampingAndThole[atom];
    data.damp = temp.x;
    data.thole = temp.y;
@@ -382,8 +378,8 @@ extern "C" __global__ void computeElectrostatics(
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
        const unsigned int* __restrict__ interactingAtoms,
 #endif
-        const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
-        const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
+        const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole,
+        const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
    const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
    const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
    const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);

--- a/plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
@@ -18,31 +18,23 @@ typedef struct {
 } AtomData;

 #ifdef USE_GK
-inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
-        const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole, const real* __restrict__ inducedDipoleS,
-        const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii) {
+inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
+        const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole, const real3* __restrict__ inducedDipoleS,
+        const real3* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii) {
 #else
-inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
-        const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
+inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
+        const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
 #endif
    real4 atomPosq = posq[atom];
    data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
-    data.inducedDipole.x = inducedDipole[atom*3];
-    data.inducedDipole.y = inducedDipole[atom*3+1];
-    data.inducedDipole.z = inducedDipole[atom*3+2];
-    data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
-    data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
-    data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
+    data.inducedDipole = inducedDipole[atom];
+    data.inducedDipolePolar = inducedDipolePolar[atom];
    float2 temp = dampingAndThole[atom];
    data.damp = temp.x;
    data.thole = temp.y;
 #ifdef USE_GK
-    data.inducedDipoleS.x = inducedDipoleS[atom*3];
-    data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
-    data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
-    data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
-    data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
-    data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
+    data.inducedDipoleS = inducedDipoleS[atom];
+    data.inducedDipolePolarS = inducedDipolePolarS[atom];
    data.bornRadius = bornRadii[atom];
 #endif
 }
@@ -358,7 +350,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
 */
 extern "C" __global__ void computeInducedField(
        unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const int2* __restrict__ exclusionTiles, 
-        const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
+        const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
 #ifdef EXTRAPOLATED_POLARIZATION
        unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
 #endif
@@ -366,8 +358,8 @@ extern "C" __global__ void computeInducedField(
        const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
 #elif defined USE_GK
-        unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
-        const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
+        unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real3* __restrict__ inducedDipoleS,
+        const real3* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
    #ifdef EXTRAPOLATED_POLARIZATION
        unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS,
    #endif
@@ -556,53 +548,6 @@ extern "C" __global__ void computeInducedField(
    }
 }

-extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
-        const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
-        real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) {
-    extern __shared__ real2 buffer[];
-    const float polarSOR = 0.55f;
-#ifdef USE_EWALD
-    const real ewaldScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
-#else
-    const real ewaldScale = 0;
-#endif
-    const real fieldScale = 1/(real) 0x100000000;
-    real sumErrors = 0;
-    real sumPolarErrors = 0;
-    for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
-        real scale = polarizability[atom];
-        for (int component = 0; component < 3; component++) {
-            int dipoleIndex = 3*atom+component;
-            int fieldIndex = atom+component*PADDED_NUM_ATOMS;
-            real previousDipole = inducedDipole[dipoleIndex];
-            real previousDipolePolar = inducedDipolePolar[dipoleIndex];
-            long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
-            real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale+ewaldScale*previousDipole);
-            real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*previousDipolePolar);
-            newDipole = previousDipole + polarSOR*(newDipole-previousDipole);
-            newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar);
-            inducedDipole[dipoleIndex] = newDipole;
-            inducedDipolePolar[dipoleIndex] = newDipolePolar;
-            sumErrors += (newDipole-previousDipole)*(newDipole-previousDipole);
-            sumPolarErrors += (newDipolePolar-previousDipolePolar)*(newDipolePolar-previousDipolePolar);
-        }
-    }
-    
-    // Sum the errors over threads and store the total for this block.
-    
-    buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
-    __syncthreads();
-    for (int offset = 1; offset < blockDim.x; offset *= 2) {
-        if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
-            buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
-            buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
-        }
-        __syncthreads();
-    }
-    if (threadIdx.x == 0)
-        errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
-}
-
 extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
        const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
        const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors,

--- a/plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
@@ -72,11 +72,11 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
 /**
 * Convert the fixed multipoles from Cartesian to fractional coordinates.
 */
-extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real* __restrict__ labFrameDipole,
+extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real* __restrict__ labDipole,
 #ifdef HIPPO
        const real* __restrict__ labQXX, const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
 #else
-        const real* __restrict__ labFrameQuadrupole,
+        const real* __restrict__ labQuadrupole,
 #endif
        real* __restrict__ fracDipole, real* __restrict__ fracQuadrupole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
    // Build matrices for transforming the dipoles and quadrupoles.
@@ -113,7 +113,7 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
        for (int j = 0; j < 3; j++) {
            real dipole = 0;
            for (int k = 0; k < 3; k++)
-                dipole += a[j][k]*labFrameDipole[3*i+k];
+                dipole += a[j][k]*labDipole[3*i+k];
            fracDipole[3*i+j] = dipole;
        }
        for (int j = 0; j < 6; j++) {
@@ -127,8 +127,8 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
 #else
            real quadrupole = 0;
            for (int k = 0; k < 5; k++)
-                quadrupole += quadScale[k]*b[j][k]*labFrameQuadrupole[5*i+k];
-            quadrupole -= quadScale[5]*b[j][5]*(labFrameQuadrupole[5*i]+labFrameQuadrupole[5*i+3]);
+                quadrupole += quadScale[k]*b[j][k]*labQuadrupole[5*i+k];
+            quadrupole -= quadScale[5]*b[j][5]*(labQuadrupole[5*i]+labQuadrupole[5*i+3]);
 #endif
            fracQuadrupole[6*i+j] = quadrupole;
        }
@@ -289,11 +289,11 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
    }
 }

-extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
+extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
 #ifdef HIPPO
        real* __restrict__ pmeGrid,
 #else
-        const real* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid,
+        const real3* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid,
 #endif
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
 #if __CUDA_ARCH__ < 500
@@ -325,12 +325,12 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
        pos -= periodicBoxVecZ*floor(pos.z*recipBoxVecZ.z+0.5f);
        pos -= periodicBoxVecY*floor(pos.y*recipBoxVecY.z+0.5f);
        pos -= periodicBoxVecX*floor(pos.x*recipBoxVecX.z+0.5f);
-        real3 cinducedDipole = ((const real3*) inducedDipole)[m];
+        real3 cinducedDipole = inducedDipole[m];
        real3 finducedDipole = make_real3(cinducedDipole.x*cartToFrac[0][0] + cinducedDipole.y*cartToFrac[0][1] + cinducedDipole.z*cartToFrac[0][2],
                                          cinducedDipole.x*cartToFrac[1][0] + cinducedDipole.y*cartToFrac[1][1] + cinducedDipole.z*cartToFrac[1][2],
                                          cinducedDipole.x*cartToFrac[2][0] + cinducedDipole.y*cartToFrac[2][1] + cinducedDipole.z*cartToFrac[2][2]);
 #ifndef HIPPO
-        real3 cinducedDipolePolar = ((const real3*) inducedDipolePolar)[m];
+        real3 cinducedDipolePolar = inducedDipolePolar[m];
        real3 finducedDipolePolar = make_real3(cinducedDipolePolar.x*cartToFrac[0][0] + cinducedDipolePolar.y*cartToFrac[0][1] + cinducedDipolePolar.z*cartToFrac[0][2],
                                               cinducedDipolePolar.x*cartToFrac[1][0] + cinducedDipolePolar.y*cartToFrac[1][1] + cinducedDipolePolar.z*cartToFrac[1][2],
                                               cinducedDipolePolar.x*cartToFrac[2][0] + cinducedDipolePolar.y*cartToFrac[2][1] + cinducedDipolePolar.z*cartToFrac[2][2]);
@@ -480,7 +480,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(
 #ifndef HIPPO
        long long* __restrict__ fieldPolarBuffers,
 #endif
-        const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
+        const real4* __restrict__ posq, const real* __restrict__ labDipole, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
        real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
 #if __CUDA_ARCH__ < 500
    real array[PME_ORDER*PME_ORDER];
@@ -643,9 +643,9 @@ extern "C" __global__ void computeFixedPotentialFromGrid(
        phi[m+NUM_ATOMS*18] = tuv012;
        phi[m+NUM_ATOMS*19] = tuv111;
        real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
-        long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2])*0x100000000);
-        long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2])*0x100000000);
-        long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2])*0x100000000);
+        long long fieldx = (long long) ((dipoleScale*labDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2])*0x100000000);
+        long long fieldy = (long long) ((dipoleScale*labDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2])*0x100000000);
+        long long fieldz = (long long) ((dipoleScale*labDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2])*0x100000000);
        fieldBuffers[m] = fieldx;
        fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
        fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
@@ -942,12 +942,12 @@ extern "C" __global__ void computeInducedPotentialFromGrid(
 }

 extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
-        long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
+        long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labDipole,
 #ifdef HIPPO
        const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ labQXX,
        const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
 #else
-        const real* __restrict__ labFrameQuadrupole,
+        const real* __restrict__ labQuadrupole,
 #endif
        const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole,
        const real* __restrict__ phi, const real* __restrict__ cphi_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
@@ -972,9 +972,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
        // Compute the torque.

-        multipole[1] = labFrameDipole[i*3];
-        multipole[2] = labFrameDipole[i*3+1];
-        multipole[3] = labFrameDipole[i*3+2];
+        multipole[1] = labDipole[i*3];
+        multipole[2] = labDipole[i*3+1];
+        multipole[3] = labDipole[i*3+2];
 #ifdef HIPPO
        multipole[0] = coreCharge[i]+valenceCharge[i];
        multipole[4] = labQXX[i];
@@ -984,11 +984,11 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
        multipole[9] = 2*labQYZ[i];
 #else
        multipole[0] = posq[i].w;
-        multipole[4] = labFrameQuadrupole[i*5];
-        multipole[5] = labFrameQuadrupole[i*5+3];
-        multipole[7] = 2*labFrameQuadrupole[i*5+1];
-        multipole[8] = 2*labFrameQuadrupole[i*5+2];
-        multipole[9] = 2*labFrameQuadrupole[i*5+4];
+        multipole[4] = labQuadrupole[i*5];
+        multipole[5] = labQuadrupole[i*5+3];
+        multipole[7] = 2*labQuadrupole[i*5+1];
+        multipole[8] = 2*labQuadrupole[i*5+2];
+        multipole[9] = 2*labQuadrupole[i*5+4];
 #endif
        multipole[6] = -(multipole[4]+multipole[5]);

@@ -1039,17 +1039,17 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
 }

 extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
-        long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
+        long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labDipole,
 #ifdef HIPPO
        const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ extrapolatedDipole,
        const real* __restrict__ extrapolatedPhi, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
        const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
 #else
-        const real* __restrict__ labFrameQuadrupole,
+        const real* __restrict__ labQuadrupole,
 #endif
-        const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real* __restrict__ inducedDipole_global,
+        const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real3* __restrict__ inducedDipole_global,
 #ifndef HIPPO
-        const real* __restrict__ inducedDipolePolar_global,
+        const real3* __restrict__ inducedDipolePolar_global,
 #endif
        const real* __restrict__ phi,
 #ifndef HIPPO
@@ -1082,9 +1082,9 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
        // Compute the torque.

-        multipole[1] = labFrameDipole[i*3];
-        multipole[2] = labFrameDipole[i*3+1];
-        multipole[3] = labFrameDipole[i*3+2];
+        multipole[1] = labDipole[i*3];
+        multipole[2] = labDipole[i*3+1];
+        multipole[3] = labDipole[i*3+2];
 #ifdef HIPPO
        multipole[0] = coreCharge[i]+valenceCharge[i];
        multipole[4] = labQXX[i];
@@ -1095,11 +1095,11 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
        const real scale = EPSILON_FACTOR;
 #else
        multipole[0] = posq[i].w;
-        multipole[4] = labFrameQuadrupole[i*5];
-        multipole[5] = labFrameQuadrupole[i*5+3];
-        multipole[7] = 2*labFrameQuadrupole[i*5+1];
-        multipole[8] = 2*labFrameQuadrupole[i*5+2];
-        multipole[9] = 2*labFrameQuadrupole[i*5+4];
+        multipole[4] = labQuadrupole[i*5];
+        multipole[5] = labQuadrupole[i*5+3];
+        multipole[7] = 2*labQuadrupole[i*5+1];
+        multipole[8] = 2*labQuadrupole[i*5+2];
+        multipole[9] = 2*labQuadrupole[i*5+4];
        const real scale = EPSILON_FACTOR/2;
 #endif
        multipole[6] = -(multipole[4]+multipole[5]);
@@ -1132,13 +1132,13 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
        multipole[8] = fracQuadrupole[i*6+2];
        multipole[9] = fracQuadrupole[i*6+4];

-        cinducedDipole[0] = inducedDipole_global[i*3];
-        cinducedDipole[1] = inducedDipole_global[i*3+1];
-        cinducedDipole[2] = inducedDipole_global[i*3+2];
+        cinducedDipole[0] = inducedDipole_global[i].x;
+        cinducedDipole[1] = inducedDipole_global[i].y;
+        cinducedDipole[2] = inducedDipole_global[i].z;
 #ifndef HIPPO
-        cinducedDipolePolar[0] = inducedDipolePolar_global[i*3];
-        cinducedDipolePolar[1] = inducedDipolePolar_global[i*3+1];
-        cinducedDipolePolar[2] = inducedDipolePolar_global[i*3+2];
+        cinducedDipolePolar[0] = inducedDipolePolar_global[i].x;
+        cinducedDipolePolar[1] = inducedDipolePolar_global[i].y;
+        cinducedDipolePolar[2] = inducedDipolePolar_global[i].z;
 #endif
        
        // Multiply the dipoles by cartToFrac, which is just the transpose of fracToCart.
@@ -1212,7 +1212,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_

 #ifdef HIPPO
 extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phidp, long long* __restrict__ inducedField,
-        const real* __restrict__ inducedDipole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
+        const real3* __restrict__ inducedDipole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
    __shared__ real fracToCart[3][3];
    if (threadIdx.x == 0) {
        fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
@@ -1228,15 +1228,15 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
    __syncthreads();
    real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
-        inducedField[i] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]));
-        inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]));
-        inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]));
+        inducedField[i] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[i].x));
+        inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[i].y));
+        inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[i].z));
    }
 }

 extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
-        const real* __restrict__ labFrameDipole, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge,
-        const real* __restrict__ c6, const real* __restrict__ inducedDipole, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
+        const real3* __restrict__ labDipole, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge,
+        const real* __restrict__ c6, const real3* __restrict__ inducedDipole, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
        const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ) {
    const real torqueScale = 4*EPSILON_FACTOR*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/(3*SQRT_PI);
    real cii = 0;
@@ -1245,8 +1245,8 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__
    real c6ii = 0;
    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
        real charge = coreCharge[i]+valenceCharge[i];
-        real3 dipole = make_real3(labFrameDipole[i*3], labFrameDipole[i*3+1], labFrameDipole[i*3+2]);
-        real3 induced = make_real3(inducedDipole[i*3], inducedDipole[i*3+1], inducedDipole[i*3+2]);
+        real3 dipole = labDipole[i];
+        real3 induced = inducedDipole[i];
        real qXX = labQXX[i];
        real qXY = labQXY[i];
        real qXZ = labQXZ[i];
@@ -1270,7 +1270,7 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__
 }
 #else
 extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, long long* __restrict__ inducedField,
-        long long* __restrict__ inducedFieldPolar, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
+        long long* __restrict__ inducedFieldPolar, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
 #ifdef EXTRAPOLATED_POLARIZATION
        unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
 #endif
@@ -1290,12 +1290,12 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
    __syncthreads();
    real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
-        inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]));
-        inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]));
-        inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]));
-        inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[3*i]));
-        inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[3*i+1]));
-        inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[3*i+2]));
+        inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[i].x));
+        inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[i].y));
+        inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[i].z));
+        inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[i].x));
+        inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[i].y));
+        inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[i].z));
 #ifdef EXTRAPOLATED_POLARIZATION
        // Compute and store the field gradients for later use.


--- a/plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
 extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4* __restrict__ multipoleParticles, float* __restrict__ molecularDipoles,
-        float* __restrict__ molecularQuadrupoles, real* __restrict__ labFrameDipoles, real* __restrict__ labFrameQuadrupoles,
+        float* __restrict__ molecularQuadrupoles, real3* __restrict__ labFrameDipoles, real* __restrict__ labFrameQuadrupoles,
        real* __restrict__ sphericalDipoles, real* __restrict__ sphericalQuadrupoles) {
    for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
        // Load the spherical multipoles.
@@ -176,9 +176,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
            molDipole[2] = molecularDipoles[offset+2];
            if (reverse)
                molDipole[1] *= -1;
-            labFrameDipoles[offset] = molDipole[0]*vectorX.x + molDipole[1]*vectorY.x + molDipole[2]*vectorZ.x;
-            labFrameDipoles[offset+1] = molDipole[0]*vectorX.y + molDipole[1]*vectorY.y + molDipole[2]*vectorZ.y;
-            labFrameDipoles[offset+2] = molDipole[0]*vectorX.z + molDipole[1]*vectorY.z + molDipole[2]*vectorZ.z;
+            labFrameDipoles[atom] = make_real3(molDipole[0]*vectorX.x + molDipole[1]*vectorY.x + molDipole[2]*vectorZ.x,
+                                               molDipole[0]*vectorX.y + molDipole[1]*vectorY.y + molDipole[2]*vectorZ.y,
+                                               molDipole[0]*vectorX.z + molDipole[1]*vectorY.z + molDipole[2]*vectorZ.z);
            
            // ---------------------------------------------------------------------------------------
            
@@ -275,9 +275,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
            sphericalQuadrupoles[offset+4] = rotatedQuadrupole[4];
        }
        else {
-            labFrameDipoles[3*atom] = molecularDipoles[3*atom];
-            labFrameDipoles[3*atom+1] = molecularDipoles[3*atom+1];
-            labFrameDipoles[3*atom+2] = molecularDipoles[3*atom+2];
+            labFrameDipoles[atom] = make_real3(molecularDipoles[3*atom], molecularDipoles[3*atom+1], molecularDipoles[3*atom+2]);
            labFrameQuadrupoles[5*atom] = molecularQuadrupoles[5*atom];
            labFrameQuadrupoles[5*atom+1] = molecularQuadrupoles[5*atom+1];
            labFrameQuadrupoles[5*atom+2] = molecularQuadrupoles[5*atom+2];
@@ -289,24 +287,24 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4

 extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers,
 #ifdef USE_GK
-        const long long* __restrict__ gkFieldBuffers, real* __restrict__ inducedDipoleS, real* __restrict__ inducedDipolePolarS, 
+        const long long* __restrict__ gkFieldBuffers, real3* __restrict__ inducedDipoleS, real3* __restrict__ inducedDipolePolarS, 
 #endif
-        real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
+        real3* __restrict__ inducedDipole, real3* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
    for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
        real scale = polarizability[atom]/(real) 0x100000000;
-        inducedDipole[3*atom] = scale*fieldBuffers[atom];
-        inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
-        inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
-        inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom];
-        inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
-        inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
+        inducedDipole[atom].x = scale*fieldBuffers[atom];
+        inducedDipole[atom].y = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
+        inducedDipole[atom].z = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
+        inducedDipolePolar[atom].x = scale*fieldPolarBuffers[atom];
+        inducedDipolePolar[atom].y = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
+        inducedDipolePolar[atom].z = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
 #ifdef USE_GK
-        inducedDipoleS[3*atom] = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
-        inducedDipoleS[3*atom+1] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
-        inducedDipoleS[3*atom+2] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
-        inducedDipolePolarS[3*atom] = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
-        inducedDipolePolarS[3*atom+1] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
-        inducedDipolePolarS[3*atom+2] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
+        inducedDipoleS[atom].x = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
+        inducedDipoleS[atom].y = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
+        inducedDipoleS[atom].z = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
+        inducedDipolePolarS[atom].x = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
+        inducedDipolePolarS[atom].y = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
+        inducedDipolePolarS[atom].z = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
 #endif
    }
 }
@@ -532,7 +530,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
 * Compute the electrostatic potential at each of a set of points.
 */
 extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
-        const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real4* __restrict__ points,
+        const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real4* __restrict__ points,
        real* __restrict__ potential, int numPoints, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
    extern __shared__ real4 localPosq[];
    real3* localDipole = (real3*) &localPosq[blockDim.x];
@@ -550,7 +548,7 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
            if (atom < NUM_ATOMS) {
                localPosq[threadIdx.x] = posq[atom];
                localDipole[threadIdx.x] = make_real3(labFrameDipole[3*atom], labFrameDipole[3*atom+1], labFrameDipole[3*atom+2]);
-                localInducedDipole[threadIdx.x] = make_real3(inducedDipole[3*atom], inducedDipole[3*atom+1], inducedDipole[3*atom+2]);
+                localInducedDipole[threadIdx.x] = inducedDipole[atom];
                localQuadrupole[5*threadIdx.x] = labFrameQuadrupole[5*atom];
                localQuadrupole[5*threadIdx.x+1] = labFrameQuadrupole[5*atom+1];
                localQuadrupole[5*threadIdx.x+2] = labFrameQuadrupole[5*atom+2];

--- a/plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
@@ -10,7 +10,7 @@ typedef struct {
 } AtomData;

 inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
-            const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
+            const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
            const float2* __restrict__ dampingAndThole) {
    real4 atomPosq = posq[atom];
    data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
@@ -25,12 +25,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
    data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
    data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
 #endif
-    data.inducedDipole.x = inducedDipole[atom*3];
-    data.inducedDipole.y = inducedDipole[atom*3+1];
-    data.inducedDipole.z = inducedDipole[atom*3+2];
-    data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
-    data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
-    data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
+    data.inducedDipole = inducedDipole[atom];
+    data.inducedDipolePolar = inducedDipolePolar[atom];
    float2 temp = dampingAndThole[atom];
    data.damp = temp.x;
    data.thole = temp.y;
@@ -447,8 +443,8 @@ extern "C" __global__ void computeElectrostatics(
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
        const unsigned int* __restrict__ interactingAtoms,
 #endif
-        const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
-        const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
+        const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole,
+        const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
    const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
    const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
    const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);

--- a/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
+++ b/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
@@ -44,7 +44,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce() :
                                                   _maximumMutualInducedDipoleIterations(100),
                                                   _mutualInducedDipoleEpsilon(1.0e+50),
                                                   _mutualInducedDipoleTargetEpsilon(1.0e-04),
-                                                   _polarSOR(0.55),
                                                   _debye(48.033324)
 {
    initialize();
@@ -60,7 +59,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce(NonbondedMethod non
                                                   _maximumMutualInducedDipoleIterations(100),
                                                   _mutualInducedDipoleEpsilon(1.0e+50),
                                                   _mutualInducedDipoleTargetEpsilon(1.0e-04),
-                                                   _polarSOR(0.55),
                                                   _debye(48.033324)
 {
    initialize();
@@ -890,78 +888,6 @@ void AmoebaReferenceMultipoleForce::calculateInducedDipoleFields(const vector<Mu
            calculateInducedDipolePairIxns(particleData[ii], particleData[jj], updateInducedDipoleFields);
 }

-double AmoebaReferenceMultipoleForce::updateInducedDipoleFields(const vector<MultipoleParticleData>& particleData,
-                                                                vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleFields)
-{
-    // Calculate the fields coming from induced dipoles.
-
-    calculateInducedDipoleFields(particleData, updateInducedDipoleFields);
-
-    // Update the induced dipoles and calculate the convergence factor, maxEpsilon
-
-    double maxEpsilon = 0.0;
-    for (auto& field : updateInducedDipoleFields) {
-        double epsilon = updateInducedDipole(particleData,
-                                             *field.fixedMultipoleField,
-                                             field.inducedDipoleField,
-                                             *field.inducedDipoles);
-
-        maxEpsilon = epsilon > maxEpsilon ? epsilon : maxEpsilon;
-    }
-
-    return maxEpsilon;
-}
-
-double AmoebaReferenceMultipoleForce::updateInducedDipole(const vector<MultipoleParticleData>& particleData,
-                                                          const vector<Vec3>& fixedMultipoleField,
-                                                          const vector<Vec3>& inducedDipoleField,
-                                                          vector<Vec3>& inducedDipole)
-{
-
-    double epsilon = 0.0;
-    for (unsigned int ii = 0; ii < particleData.size(); ii++) {
-        Vec3 oldValue               = inducedDipole[ii];
-        Vec3 newValue               = fixedMultipoleField[ii] + inducedDipoleField[ii]*particleData[ii].polarity;
-        Vec3 delta                  = newValue - oldValue;
-        inducedDipole[ii]           = oldValue + delta*_polarSOR;
-        epsilon                    += delta.dot(delta);
-    }
-    return epsilon;
-}
-
-void AmoebaReferenceMultipoleForce::convergeInduceDipolesBySOR(const vector<MultipoleParticleData>& particleData,
-                                                               vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleField)
-{
-
-    bool done = false;
-    setMutualInducedDipoleConverged(false);
-    int iteration = 0;
-    double currentEpsilon = 1.0e+50;
-
-    // loop until (1) induced dipoles are converged or
-    //            (2) iterations == max iterations or
-    //            (3) convergence factor (spsilon) increases
-
-    while (!done) {
-
-        double epsilon = updateInducedDipoleFields(particleData, updateInducedDipoleField);
-               epsilon = _polarSOR*_debye*sqrt(epsilon/_numParticles);
-
-        if (epsilon < getMutualInducedDipoleTargetEpsilon()) {
-            setMutualInducedDipoleConverged(true);
-            done = true;
-        } else if (currentEpsilon < epsilon || iteration >= getMaximumMutualInducedDipoleIterations()) {
-            done = true;
-        }
-
-        currentEpsilon = epsilon;
-        iteration++;
-    }
-    setMutualInducedDipoleEpsilon(currentEpsilon);
-    setMutualInducedDipoleIterations(iteration);
-}
-
-
 void AmoebaReferenceMultipoleForce::convergeInduceDipolesByExtrapolation(const vector<MultipoleParticleData>& particleData, vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleField) {
    // Start by storing the direct dipoles as PT0


--- a/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
+++ b/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
@@ -750,7 +750,6 @@ protected:
    std::vector<double>  _extPartCoefficients;
    double  _mutualInducedDipoleEpsilon;
    double  _mutualInducedDipoleTargetEpsilon;
-    double  _polarSOR;
    double  _debye;

    /**
@@ -1027,14 +1026,6 @@ protected:
     * @param particleData              vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
     * @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
     */
-    void convergeInduceDipolesBySOR(const std::vector<MultipoleParticleData>& particleData,
-                                    std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
-    /**
-     * Converge induced dipoles.
-     * 
-     * @param particleData              vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
-     * @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
-     */
    void convergeInduceDipolesByDIIS(const std::vector<MultipoleParticleData>& particleData,
                                     std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
    
@@ -1046,28 +1037,6 @@ protected:
     */
    void computeDIISCoefficients(const std::vector<std::vector<Vec3> >& prevErrors, std::vector<double>& coefficients) const;

-    /**
-     * Update fields due to induced dipoles for each particle.
-     * 
-     * @param particleData              vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
-     * @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
-     */
-    double updateInducedDipoleFields(const std::vector<MultipoleParticleData>& particleData,
-                                     std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
-
-    /**
-     * Update induced dipole for a particle given updated induced dipole field at the site.
-     * 
-     * @param particleI                 positions and parameters (charge, labFrame dipoles, quadrupoles, ...) for particle I
-     * @param fixedMultipoleField       fields due fixed multipoles at each site
-     * @param inducedDipoleField        fields due induced dipoles at each site
-     * @param inducedDipoles            output vector of updated induced dipoles
-     */
-    double updateInducedDipole(const std::vector<MultipoleParticleData>& particleI,
-                               const std::vector<Vec3>& fixedMultipoleField,
-                               const std::vector<Vec3>& inducedDipoleField,
-                               std::vector<Vec3>& inducedDipoles);
-
    /**
     * Calculate induced dipoles.
     *