Merge branch 'master' of https://github.com/SimTk/openmm

67004571 · peastman · 37457b25 · 25f1080a · 67004571 · 67004571
Commit 67004571 authored Jul 23, 2014 by peastman
20 changed files
--- a/docs-source/usersguide/zbibliography.rst
+++ b/docs-source/usersguide/zbibliography.rst
+.. only:: html
+
+   Bibliography
+   ############
+
+.. bibliography:: references.bib
+   :style: unsrt
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
+#
+# Build and install API documentation
+#
+
+find_package(Doxygen QUIET)
+mark_as_advanced(CLEAR DOXYGEN_EXECUTABLE)
+IF(DOXYGEN_EXECUTABLE)
+    # Generate C++ API documentation
+
+    SET(DOXY_CONFIG_C++ "${CMAKE_BINARY_DIR}/DoxyfileC++")
+    CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/DoxyfileC++.in 
+          ${DOXY_CONFIG_C++}
+          @ONLY )
+    FILE(GLOB_RECURSE OPENMM_INCLUDES "openmm/include/*.h")
+    FILE(GLOB_RECURSE OLLA_INCLUDES "olla/include/*.h")
+    ADD_CUSTOM_COMMAND(
+        OUTPUT "${CMAKE_BINARY_DIR}/api-c++/index.html"
+        COMMAND ${DOXYGEN_EXECUTABLE} ${DOXY_CONFIG_C++}
+        DEPENDS ${OPENMM_INCLUDES} ${OLLA_INCLUDES}
+        WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
+        COMMENT "Generating C++ API documentation using Doxygen")
+    ADD_CUSTOM_TARGET(C++ApiDocs 
+        DEPENDS "${CMAKE_BINARY_DIR}/api-c++/index.html"
+        COMMENT "Generating C++ API documentation using Doxygen"
+        SOURCES 
+            "${CMAKE_CURRENT_SOURCE_DIR}/DoxyfileC++.in"
+            ${OPENMM_INCLUDES}
+            ${OLLA_INCLUDES}
+    ) 
+    FILE(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/api-c++/")
+    INSTALL(DIRECTORY "${PROJECT_BINARY_DIR}/api-c++/"
+            DESTINATION "docs/api-c++/")
+    INSTALL(FILES "C++ API Reference.html"
+            DESTINATION "docs/")
+    ADD_CUSTOM_TARGET(DoxygenApiDocs
+        DEPENDS "${CMAKE_BINARY_DIR}/api-c++/index.html"
+        COMMENT "Generating C++ API documentation using Doxygen"
+        SOURCES 
+        "${CMAKE_CURRENT_SOURCE_DIR}/DoxyfileC++.in"
+        "${CMAKE_CURRENT_SOURCE_DIR}/DoxyfilePython.in"
+        ${OPENMM_INCLUDES}
+        ${OLLA_INCLUDES}
+    )
+    set(OPENMM_GENERATE_API_DOCS OFF CACHE BOOL "Whether to create API documentation using Doxygen")
+    IF (OPENMM_GENERATE_API_DOCS)
+        SET_TARGET_PROPERTIES(DoxygenApiDocs PROPERTIES EXCLUDE_FROM_ALL FALSE)
+    ENDIF (OPENMM_GENERATE_API_DOCS)
+
+    # Generate Python API documentation
+
+    IF (OPENMM_BUILD_PYTHON_WRAPPERS)
+        SET(DOXY_CONFIG_PYTHON "${CMAKE_BINARY_DIR}/DoxyfilePython")
+        CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/DoxyfilePython.in 
+            ${DOXY_CONFIG_PYTHON}
+            @ONLY )
+        ADD_CUSTOM_COMMAND(
+            OUTPUT "${CMAKE_BINARY_DIR}/api-python/index.html"
+            COMMAND ${DOXYGEN_EXECUTABLE} ${DOXY_CONFIG_PYTHON}
+            DEPENDS RunSwig
+            WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
+            COMMENT "Generating Python API documentation using Doxygen")
+        ADD_CUSTOM_TARGET(PythonApiDocs 
+            DEPENDS "${CMAKE_BINARY_DIR}/api-python/index.html"
+            COMMENT "Generating Python API documentation using Doxygen"
+            SOURCES 
+                "${CMAKE_CURRENT_SOURCE_DIR}/DoxyfilePython.in"
+                ${OPENMM_INCLUDES}
+                ${OLLA_INCLUDES}
+        ) 
+        FILE(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/api-python/")
+        INSTALL(DIRECTORY "${PROJECT_BINARY_DIR}/api-python/"
+                DESTINATION "docs/api-python/")
+        INSTALL(FILES "Python API Reference.html"
+                DESTINATION "docs/")
+        ADD_DEPENDENCIES(DoxygenApiDocs PythonApiDocs)
+    ENDIF (OPENMM_BUILD_PYTHON_WRAPPERS)
+ENDIF(DOXYGEN_EXECUTABLE)
+
+
+#
+# Build and install the User Guide and Developer Guide
+#
+
+SET(SPHINX_BUILD_DIR "${CMAKE_BINARY_DIR}/sphinx-docs/")
+FILE(MAKE_DIRECTORY "${SPHINX_BUILD_DIR}")
+
+ADD_CUSTOM_COMMAND(
+    OUTPUT "${SPHINX_BUILD_DIR}/userguide/latex/OpenMMUsersGuide.pdf"
+    COMMAND "${CMAKE_MAKE_PROGRAM}" BUILDDIR="${SPHINX_BUILD_DIR}/userguide" OPENMM_VERSION="${OPENMM_MAJOR_VERSION}.${OPENMM_MINOR_VERSION}" latexpdf
+    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/usersguide"
+    COMMENT "Generating PDF user guide"
+)
+ADD_CUSTOM_COMMAND(
+    OUTPUT "${SPHINX_BUILD_DIR}/developerguide/latex/OpenMMDeveloperGuide.pdf"
+    COMMAND "${CMAKE_MAKE_PROGRAM}" BUILDDIR="${SPHINX_BUILD_DIR}/developerguide" OPENMM_VERSION="${OPENMM_MAJOR_VERSION}.${OPENMM_MINOR_VERSION}" latexpdf
+    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/developerguide"
+    COMMENT "Generating PDF developer guide"
+)
+ADD_CUSTOM_TARGET(sphinxpdf
+    DEPENDS "${SPHINX_BUILD_DIR}/userguide/latex/OpenMMUsersGuide.pdf" "${SPHINX_BUILD_DIR}/developerguide/latex/OpenMMDeveloperGuide.pdf"
+)
+
+ADD_CUSTOM_COMMAND(
+    OUTPUT "${SPHINX_BUILD_DIR}/userguide/html/index.html"
+    COMMAND "${CMAKE_MAKE_PROGRAM}" BUILDDIR="${SPHINX_BUILD_DIR}/userguide" OPENMM_VERSION="${OPENMM_MAJOR_VERSION}.${OPENMM_MINOR_VERSION}" html
+    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/usersguide"
+    COMMENT "Generating PDF user guide"
+)
+ADD_CUSTOM_COMMAND(
+    OUTPUT "${SPHINX_BUILD_DIR}/developerguide/html/index.html"
+    COMMAND "${CMAKE_MAKE_PROGRAM}" BUILDDIR="${SPHINX_BUILD_DIR}/developerguide" OPENMM_VERSION="${OPENMM_MAJOR_VERSION}.${OPENMM_MINOR_VERSION}" html
+    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/developerguide"
+    COMMENT "Generating PDF developer guide"
+)
+ADD_CUSTOM_TARGET(sphinxhtml
+    DEPENDS "${SPHINX_BUILD_DIR}/userguide/html/index.html" "${SPHINX_BUILD_DIR}/developerguide/html/index.html"
+)
+
+install(FILES "${SPHINX_BUILD_DIR}/userguide/latex/OpenMMUsersGuide.pdf" "${SPHINX_BUILD_DIR}developerguide/latex/OpenMMDeveloperGuide.pdf"
+    DESTINATION docs/)
+
+FILE(GLOB LICENSE_FILES "licenses/*.txt")
+install(FILES ${LICENSE_FILES}
+    DESTINATION licenses/)
--- a/docs/developerguide/conf.py
+++ b/docs/developerguide/conf.py
@@ -49,9 +49,9 @@ copyright = u'2011-2014, Stanford University'
 # built documents.
 #
 # The short X.Y version.
-version = '6.0'
+version = os.getenv('OPENMM_VERSION')
 # The full version, including alpha/beta/rc tags.
-release = '6.0'
+release = os.getenv('OPENMM_VERSION')

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

--- a/docs/sphinx/numsec.py
+++ b/docs/sphinx/numsec.py
+"""
+Changes section references to be the section number
+instead of the title of the section.
+"""
+
+from docutils import nodes
+import sphinx.domains.std
+
+class CustomStandardDomain(sphinx.domains.std.StandardDomain):
+
+    def __init__(self, env):
+        env.settings['footnote_references'] = 'superscript'
+        sphinx.domains.std.StandardDomain.__init__(self, env)
+
+    def resolve_xref(self, env, fromdocname, builder,
+                     typ, target, node, contnode):
+        res = super(CustomStandardDomain, self).resolve_xref(env, fromdocname, builder,
+                                                            typ, target, node, contnode)
+        
+        if res is None:
+            return res
+        
+        if typ == 'ref' and not node['refexplicit']:
+            docname, labelid, sectname = self.data['labels'].get(target, ('','',''))
+            res['refdocname'] = docname
+        
+        return res
+
+def doctree_resolved(app, doctree, docname):
+    secnums = app.builder.env.toc_secnumbers
+    for node in doctree.traverse(nodes.reference):
+        if 'refdocname' in node:
+            refdocname = node['refdocname']
+            if refdocname in secnums:
+                secnum = secnums[refdocname]
+                emphnode = node.children[0]
+                textnode = emphnode.children[0]
+                
+                toclist = app.builder.env.tocs[refdocname]
+                anchorname = None
+                for refnode in toclist.traverse(nodes.reference):
+                    if refnode.astext() == textnode.astext():
+                        anchorname = refnode['anchorname']
+                if anchorname is None:
+                    continue
+                linktext = '.'.join(map(str, secnum[anchorname]))
+                node.replace(emphnode, nodes.Text(linktext))
+
+def setup(app):
+    app.override_domain(CustomStandardDomain)
+    app.connect('doctree-resolved', doctree_resolved)
+
--- a/docs/usersguide/application.rst
+++ b/docs/usersguide/application.rst
@@ -2188,7 +2188,7 @@ second atom has class OS and the third has class P:
    <Proper class1="" class2="OS" class3="P" class4="" per="3" phase="0.0" k="0.66944"/>

 <CustomNonbondedForce>
-===============
+======================

 To add a CustomNonbondedForce to the System, include a tag that looks like this:


--- a/docs/usersguide/conf.py
+++ b/docs/usersguide/conf.py
@@ -49,9 +49,9 @@ copyright = u'2008-2014, Stanford University'
 # built documents.
 #
 # The short X.Y version.
-version = '6.0'
+version = os.getenv('OPENMM_VERSION')
 # The full version, including alpha/beta/rc tags.
-release = '6.0'
+release = os.getenv('OPENMM_VERSION')

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
@@ -40,6 +40,7 @@
 #include "CudaForceInfo.h"
 #include "CudaKernelSources.h"
 #include "CudaNonbondedUtilities.h"
+#include "jama_svd.h"

 #include <algorithm>
 #include <cmath>
@@ -796,8 +797,9 @@ private:
 CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : 
        CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false), hasInitializedFFT(false), multipolesAreValid(false),
        multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL),
-        field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL),
-        inducedDipole(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
+        field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
+        diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL),
+        prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
        pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL),
        pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) {
 }
@@ -832,6 +834,20 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
        delete inducedDipolePolar;
    if (inducedDipoleErrors != NULL)
        delete inducedDipoleErrors;
+    if (prevDipoles != NULL)
+        delete prevDipoles;
+    if (prevDipolesPolar != NULL)
+        delete prevDipolesPolar;
+    if (prevDipolesGk != NULL)
+        delete prevDipolesGk;
+    if (prevDipolesGkPolar != NULL)
+        delete prevDipolesGkPolar;
+    if (prevErrors != NULL)
+        delete prevErrors;
+    if (diisMatrix != NULL)
+        delete diisMatrix;
+    if (diisCoefficients != NULL)
+        delete diisCoefficients;
    if (polarizability != NULL)
        delete polarizability;
    if (covalentFlags != NULL)
@@ -959,6 +975,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
    inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
    inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
    inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
+    prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
+    prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar");
+    prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors");
+    diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix");
+    diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix");
    cu.addAutoclearBuffer(*field);
    cu.addAutoclearBuffer(*fieldPolar);
    cu.addAutoclearBuffer(*torque);
@@ -1088,6 +1109,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
        defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
        fixedThreadMemory += 4*elementSize;
        inducedThreadMemory += 13*elementSize;
+        prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk");
+        prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar");
    }
    int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize();
    fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory));
@@ -1102,9 +1125,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
    computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
    if (maxInducedIterations > 0) {
        defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads);
+        defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles);
        module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines);
        computeInducedFieldKernel = cu.getKernel(module, "computeInducedField");
-        updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldBySOR");
+        updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS");
+        recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS");
+        buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix");
    }
    stringstream electrostaticsSource;
    if (usePME) {
@@ -1421,7 +1447,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
        
        // Iterate until the dipoles converge.
        
-        vector<float2> errors;
        for (int i = 0; i < maxInducedIterations; i++) {
            cu.clearBuffer(*inducedField);
            cu.clearBuffer(*inducedFieldPolar);
@@ -1440,23 +1465,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
                    &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
                    &gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
                cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
-                void* updateInducedGkFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
-                    &gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedField()->getDevicePointer(),
-                    &gkKernel->getInducedFieldPolar()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(),
-                    &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
-                cu.executeKernel(updateInducedFieldKernel, updateInducedGkFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
-            }
-            void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &inducedField->getDevicePointer(),
-                &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
-                &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
-            cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
-            inducedDipoleErrors->download(errors);
-            double total1 = 0.0, total2 = 0.0;
-            for (int j = 0; j < (int) errors.size(); j++) {
-                total1 += errors[j].x;
-                total2 += errors[j].y;
            }
-            if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon)
+            double maxEpsilon = iterateDipolesByDIIS(i);
+            if (maxEpsilon < inducedEpsilon)
                break;
        }
        
@@ -1568,17 +1579,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
            void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
                &inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
            cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
-            void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &inducedField->getDevicePointer(),
-                &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
-                &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
-            cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
-            inducedDipoleErrors->download(errors);
-            double total1 = 0.0, total2 = 0.0;
-            for (int j = 0; j < (int) errors.size(); j++) {
-                total1 += errors[j].x;
-                total2 += errors[j].y;
-            }
-            if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon)
+            double maxEpsilon = iterateDipolesByDIIS(i);
+            if (maxEpsilon < inducedEpsilon)
                break;
        }
        
@@ -1612,6 +1614,88 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
    return 0.0;
 }

+double CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
+    void* npt = NULL;
+    bool trueValue = true, falseValue = false;
+    int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
+    
+    // Record the dipole and errors into the lists of previous dipoles.
+    
+    if (gkKernel != NULL) {
+        void* recordDIISDipolesGkArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedField()->getDevicePointer(),
+            &gkKernel->getInducedFieldPolar()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), 
+            &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer(), &prevDipolesGk->getDevicePointer(),
+            &prevDipolesGkPolar->getDevicePointer(), &prevErrors->getDevicePointer(), &iteration, &falseValue, &diisMatrix->getDevicePointer()};
+        cu.executeKernel(recordDIISDipolesKernel, recordDIISDipolesGkArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
+    }
+    void* recordDIISDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &inducedField->getDevicePointer(),
+        &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
+        &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer(), &prevDipoles->getDevicePointer(),
+        &prevDipolesPolar->getDevicePointer(), &prevErrors->getDevicePointer(), &iteration, &trueValue, &diisMatrix->getDevicePointer()};
+    cu.executeKernel(recordDIISDipolesKernel, recordDIISDipolesArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
+    float2* errors = (float2*) cu.getPinnedBuffer();
+    inducedDipoleErrors->download(errors, false);
+    
+    // Determine the coefficients for selecting the new dipoles.
+    
+    int numPrev = (iteration+1 < MaxPrevDIISDipoles ? iteration+1 : MaxPrevDIISDipoles);
+    void* buildMatrixArgs[] = {&prevErrors->getDevicePointer(), &iteration, &diisMatrix->getDevicePointer()};
+    int threadBlocks = min(numPrev, cu.getNumThreadBlocks());
+    cu.executeKernel(buildMatrixKernel, buildMatrixArgs, threadBlocks*128, 128, 128*elementSize);
+    vector<float> coefficients(MaxPrevDIISDipoles);
+    if (iteration == 0)
+        coefficients[0] = 1;
+    else {
+        vector<float> matrix;
+        diisMatrix->download(matrix);
+        int rank = numPrev+1;
+        Array2D<double> b(rank, rank);
+        b[0][0] = 0;
+        for (int i = 1; i < rank; i++)
+            b[i][0] = b[0][i] = -1;
+        for (int i = 0; i < numPrev; i++)
+            for (int j = 0; j < numPrev; j++)
+                b[i+1][j+1] = matrix[i*MaxPrevDIISDipoles+j];
+
+        // Solve using SVD.  Since the right hand side is (-1, 0, 0, 0, ...), this is simpler than the general case.
+
+        JAMA::SVD<double> svd(b);
+        Array2D<double> u, v;
+        svd.getU(u);
+        svd.getV(v);
+        Array1D<double> s;
+        svd.getSingularValues(s);
+        int effectiveRank = svd.rank();
+        for (int i = 1; i < rank; i++) {
+            double d = 0;
+            for (int j = 0; j < effectiveRank; j++)
+                d -= u[0][j]*v[i][j]/s[j];
+            coefficients[i-1] = d;
+        }
+    }
+    diisCoefficients->upload(&coefficients[0]);
+    
+    // Compute the dipoles.
+    
+    void* updateInducedFieldArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
+        &prevDipoles->getDevicePointer(), &prevDipolesPolar->getDevicePointer(), &diisCoefficients->getDevicePointer(), &numPrev};
+    cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
+    if (gkKernel != NULL) {
+        void* updateInducedFieldGkArgs[] = {&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
+            &prevDipolesGk->getDevicePointer(), &prevDipolesGkPolar->getDevicePointer(), &diisCoefficients->getDevicePointer(), &numPrev};
+        cu.executeKernel(updateInducedFieldKernel, updateInducedFieldGkArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
+    }
+    
+    // Compute the overall error for monitoring convergence.
+    
+    double total1 = 0.0, total2 = 0.0;
+    for (int j = 0; j < inducedDipoleErrors->getSize(); j++) {
+        total1 += errors[j].x;
+        total2 += errors[j].y;
+    }
+    return 48.033324*sqrt(max(total1, total2)/cu.getNumAtoms());
+}
+
 void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) {
    if (multipolesAreValid) {
        int numParticles = cu.getNumAtoms();

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
@@ -375,6 +375,7 @@ private:
        const char* getSortKey() const {return "value.y";}
    };
    void initializeScaleFactors();
+    double iterateDipolesByDIIS(int iteration);
    void ensureMultipolesValid(ContextImpl& context);
    template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
    int numMultipoles, maxInducedIterations;
@@ -399,6 +400,13 @@ private:
    CudaArray* inducedDipole;
    CudaArray* inducedDipolePolar;
    CudaArray* inducedDipoleErrors;
+    CudaArray* prevDipoles;
+    CudaArray* prevDipolesPolar;
+    CudaArray* prevDipolesGk;
+    CudaArray* prevDipolesGkPolar;
+    CudaArray* prevErrors;
+    CudaArray* diisMatrix;
+    CudaArray* diisCoefficients;
    CudaArray* polarizability;
    CudaArray* covalentFlags;
    CudaArray* polarizationGroupFlags;
@@ -419,8 +427,10 @@ private:
    CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
    CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
    CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
+    CUfunction recordDIISDipolesKernel, buildMatrixKernel;
    CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
    static const int PmeOrder = 5;
+    static const int MaxPrevDIISDipoles = 20;
 };

 /**

--- a/plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
@@ -485,7 +485,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
    
    buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
    __syncthreads();
-    for (int offset = 1; offset < blockDim.x; offset *= 2) {   
+    for (int offset = 1; offset < blockDim.x; offset *= 2) {
        if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
            buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
            buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
@@ -494,4 +494,115 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
    }
    if (threadIdx.x == 0)
        errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
-}
\ No newline at end of file
+}
+
+extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
+        const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
+        const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors,
+        real* __restrict__ prevDipoles, real* __restrict__ prevDipolesPolar, real* __restrict__ prevErrors, int iteration, bool recordPrevErrors, real* __restrict__ matrix) {
+    extern __shared__ real2 buffer[];
+#ifdef USE_EWALD
+    const real ewaldScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
+#else
+    const real ewaldScale = 0;
+#endif
+    const real fieldScale = 1/(real) 0x100000000;
+    real sumErrors = 0;
+    real sumPolarErrors = 0;
+    for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
+        real scale = polarizability[atom];
+        for (int component = 0; component < 3; component++) {
+            int dipoleIndex = 3*atom+component;
+            int fieldIndex = atom+component*PADDED_NUM_ATOMS;
+            if (iteration >= MAX_PREV_DIIS_DIPOLES) {
+                // We have filled up the buffer for previous dipoles, so shift them all over by one.
+                
+                for (int i = 1; i < MAX_PREV_DIIS_DIPOLES; i++) {
+                    int index1 = dipoleIndex+(i-1)*NUM_ATOMS*3;
+                    int index2 = dipoleIndex+i*NUM_ATOMS*3;
+                    prevDipoles[index1] = prevDipoles[index2];
+                    prevDipolesPolar[index1] = prevDipolesPolar[index2];
+                    if (recordPrevErrors)
+                        prevErrors[index1] = prevErrors[index2];
+                }
+            }
+            
+            // Compute the new dipole, and record it along with the error.
+            
+            real oldDipole = inducedDipole[dipoleIndex];
+            real oldDipolePolar = inducedDipolePolar[dipoleIndex];
+            long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
+            real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale+ewaldScale*oldDipole);
+            real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*oldDipolePolar);
+            int storePrevIndex = dipoleIndex+min(iteration, MAX_PREV_DIIS_DIPOLES-1)*NUM_ATOMS*3;
+            prevDipoles[storePrevIndex] = newDipole;
+            prevDipolesPolar[storePrevIndex] = newDipolePolar;
+            if (recordPrevErrors)
+                prevErrors[storePrevIndex] = newDipole-oldDipole;
+            sumErrors += (newDipole-oldDipole)*(newDipole-oldDipole);
+            sumPolarErrors += (newDipolePolar-oldDipolePolar)*(newDipolePolar-oldDipolePolar);
+        }
+    }
+    
+    // Sum the errors over threads and store the total for this block.
+    
+    buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
+    __syncthreads();
+    for (int offset = 1; offset < blockDim.x; offset *= 2) {
+        if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
+            buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
+            buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
+        }
+        __syncthreads();
+    }
+    if (threadIdx.x == 0)
+        errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
+    
+    if (iteration >= MAX_PREV_DIIS_DIPOLES && recordPrevErrors && blockIdx.x == 0) {
+        // Shift over the existing matrix elements.
+        
+        for (int i = 0; i < MAX_PREV_DIIS_DIPOLES-1; i++) {
+            if (threadIdx.x < MAX_PREV_DIIS_DIPOLES-1)
+                matrix[threadIdx.x+i*MAX_PREV_DIIS_DIPOLES] = matrix[(threadIdx.x+1)+(i+1)*MAX_PREV_DIIS_DIPOLES];
+            __syncthreads();
+        }
+    }
+}
+
+extern "C" __global__ void computeDIISMatrix(real* __restrict__ prevErrors, int iteration, real* __restrict__ matrix) {
+    extern __shared__ real sumBuffer[];
+    int j = min(iteration, MAX_PREV_DIIS_DIPOLES-1);
+    for (int i = blockIdx.x; i <= j; i += gridDim.x) {
+        // All the threads in this thread block work together to compute a single matrix element.
+
+        real sum = 0;
+        for (int index = threadIdx.x; index < NUM_ATOMS*3; index += blockDim.x)
+            sum += prevErrors[index+i*NUM_ATOMS*3]*prevErrors[index+j*NUM_ATOMS*3];
+        sumBuffer[threadIdx.x] = sum;
+        __syncthreads();
+        for (int offset = 1; offset < blockDim.x; offset *= 2) { 
+            if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
+                sumBuffer[threadIdx.x] += sumBuffer[threadIdx.x+offset];
+            __syncthreads();
+        }
+        if (threadIdx.x == 0) {
+            matrix[i+MAX_PREV_DIIS_DIPOLES*j] = sumBuffer[0];
+            if (i != j)
+                matrix[j+MAX_PREV_DIIS_DIPOLES*i] = sumBuffer[0];
+        }
+    }
+}
+
+extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, 
+        const real* __restrict__ prevDipoles, const real* __restrict__ prevDipolesPolar, const float* __restrict__ coefficients, int numPrev) {
+    for (int index = blockIdx.x*blockDim.x + threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
+        real sum = 0;
+        real sumPolar = 0;
+        for (int i = 0; i < numPrev; i++) {
+            sum += coefficients[i]*prevDipoles[i*3*NUM_ATOMS+index];
+            sumPolar += coefficients[i]*prevDipolesPolar[i*3*NUM_ATOMS+index];
+        }
+        inducedDipole[index] = sum;
+        inducedDipolePolar[index] = sumPolar;
+    }
+}
--- a/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
+++ b/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
--- a/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
+++ b/plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
--- a/wrappers/python/CMakeLists.txt
+++ b/wrappers/python/CMakeLists.txt
@@ -55,13 +55,16 @@ foreach(SUBDIR ${SUBDIRS})
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.xml"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.pdb"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.prmtop"
+        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.prm"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.inpcrd"
+        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.crd"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.parm7"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.rst7"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.ncrst"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.dms"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.top"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.par"
+        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*.str"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/*psf"
        "${CMAKE_CURRENT_SOURCE_DIR}/${SUBDIR}/charmm22.*"
    )

--- a/wrappers/python/simtk/openmm/app/__init__.py
+++ b/wrappers/python/simtk/openmm/app/__init__.py
@@ -28,7 +28,7 @@ from desmonddmsfile import DesmondDMSFile
 from checkpointreporter import CheckpointReporter
 from charmmcrdfiles import CharmmCrdFile, CharmmRstFile
 from charmmparameterset import CharmmParameterSet
-from charmmpsffile import CharmmPsfFile
+from charmmpsffile import CharmmPsfFile, CharmmPSFWarning

 # Enumerated values


--- a/wrappers/python/simtk/openmm/app/charmmparameterset.py
+++ b/wrappers/python/simtk/openmm/app/charmmparameterset.py
@@ -13,7 +13,7 @@ Copyright (c) 2014 the Authors

 Author: Jason M. Swails
 Contributors:
-Date: April 18, 2014
+Date: July 17, 2014

 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the "Software"),
@@ -123,6 +123,8 @@ class CharmmParameterSet(object):
            elif arg.endswith('.str'):
                strs.append(arg)
            elif arg.endswith('.inp'):
+                # Only consider the file name (since the directory is likely
+                # "toppar" and will screw up file type detection)
                fname = os.path.split(arg)[1]
                if 'par' in fname:
                    pars.append(arg)
@@ -436,11 +438,29 @@ class CharmmParameterSet(object):
                try:
                    at1 = words[0]
                    at2 = words[1]
-                    emin = conv(words[2], float, 'NBFIX Emin')
+                    emin = abs(conv(words[2], float, 'NBFIX Emin'))
                    rmin = conv(words[3], float, 'NBFIX Rmin')
+                    try:
+                        emin14 = abs(conv(words[4], float, 'NBFIX Emin 1-4'))
+                        rmin14 = conv(words[5], float, 'NBFIX Rmin 1-4')
+                    except IndexError:
+                        emin14 = rmin14 = None
+                    try:
+                        self.atom_types_str[at1].add_nbfix(at2, rmin, emin,
+                                                           rmin14, emin14)
+                        self.atom_types_str[at2].add_nbfix(at1, rmin, emin,
+                                                           rmin14, emin14)
+                    except KeyError:
+                        # Some stream files define NBFIX terms with an atom that
+                        # is defined in another toppar file that does not
+                        # necessarily have to be loaded. As a result, not every
+                        # NBFIX found here will necessarily need to be applied.
+                        # If we can't find a particular atom type, don't bother
+                        # adding that nbfix and press on
+                        pass
                except IndexError:
                    raise CharmmFileError('Could not parse NBFIX terms.')
-                self.nbfix_types[(min(at1, at2), max(at1, at2))] = (emin, rmin)
+                self.nbfix_types[(min(at1, at2), max(at1, at2))] = (rmin, emin)
        # Now we're done. Load the nonbonded types into the relevant AtomType
        # instances. In order for this to work, all keys in nonbonded_types
        # must be in the self.atom_types_str dict. Raise a RuntimeError if this

--- a/wrappers/python/simtk/openmm/app/charmmpsffile.py
+++ b/wrappers/python/simtk/openmm/app/charmmpsffile.py
@@ -995,11 +995,25 @@ class CharmmPsfFile(object):
                            u.kilojoule_per_mole)
        ene_conv = dihe_frc_conv
      
-        # Create the system
+        # Create the system and determine if any of our atoms have NBFIX (and
+        # therefore requires a CustomNonbondedForce instead)
+        typenames = set()
        system = mm.System()
        if verbose: print('Adding particles...')
        for atom in self.atom_list:
+            typenames.add(atom.type.name)
            system.addParticle(atom.mass)
+        has_nbfix_terms = False
+        typenames = list(typenames)
+        try:
+            for i, typename in enumerate(typenames):
+                typ = params.atom_types_str[typename]
+                for j in range(i, len(typenames)):
+                    if typenames[j] in typ.nbfix:
+                        has_nbfix_terms = True
+                        raise StopIteration
+        except StopIteration:
+            pass
        # Set up the constraints
        if verbose and (constraints is not None and not rigidWater):
            print('Adding constraints...')
@@ -1240,9 +1254,85 @@ class CharmmPsfFile(object):

        # Add per-particle nonbonded parameters (LJ params)
        sigma_scale = 2**(-1/6) * 2
-        for i, atm in enumerate(self.atom_list):
-            force.addParticle(atm.charge, sigma_scale*atm.type.rmin*length_conv,
-                              abs(atm.type.epsilon*ene_conv))
+        if not has_nbfix_terms:
+            for atm in self.atom_list:
+                force.addParticle(atm.charge, sigma_scale*atm.type.rmin*length_conv,
+                                  abs(atm.type.epsilon*ene_conv))
+        else:
+            for atm in self.atom_list:
+                force.addParticle(atm.charge, 1.0, 0.0)
+            # Now add the custom nonbonded force that implements NBFIX. First
+            # thing we need to do is condense our number of types
+            lj_idx_list = [0 for atom in self.atom_list]
+            lj_radii, lj_depths = [], []
+            num_lj_types = 0
+            lj_type_list = []
+            for i, atom in enumerate(self.atom_list):
+                atom = atom.type
+                if lj_idx_list[i]: continue # already assigned
+                num_lj_types += 1
+                lj_idx_list[i] = num_lj_types
+                ljtype = (atom.rmin, atom.epsilon)
+                lj_type_list.append(atom)
+                lj_radii.append(atom.rmin)
+                lj_depths.append(atom.epsilon)
+                for j in range(i+1, len(self.atom_list)):
+                    atom2 = self.atom_list[j].type
+                    if lj_idx_list[j] > 0: continue # already assigned
+                    if atom2 is atom:
+                        lj_idx_list[j] = num_lj_types
+                    elif not atom.nbfix:
+                        # Only non-NBFIXed atom types can be compressed
+                        ljtype2 = (atom2.rmin, atom2.epsilon)
+                        if ljtype == ljtype2:
+                            lj_idx_list[j] = num_lj_types
+            # Now everything is assigned. Create the A-coefficient and
+            # B-coefficient arrays
+            acoef = [0 for i in range(num_lj_types*num_lj_types)]
+            bcoef = acoef[:]
+            for i in range(num_lj_types):
+                for j in range(num_lj_types):
+                    namej = lj_type_list[j].name
+                    try:
+                        rij, wdij, rij14, wdij14 = lj_type_list[i].nbfix[namej]
+                    except KeyError:
+                        rij = (lj_radii[i] + lj_radii[j]) * length_conv
+                        wdij = sqrt(lj_depths[i] * lj_depths[j]) * ene_conv
+                    else:
+                        rij *= length_conv
+                        wdij *= ene_conv
+                    acoef[i+num_lj_types*j] = sqrt(wdij) * rij**6
+                    bcoef[i+num_lj_types*j] = 2 * wdij * rij**6
+            cforce = mm.CustomNonbondedForce('(a/r6)^2-b/r6; r6=r^6;'
+                                             'a=acoef(type1, type2);'
+                                             'b=bcoef(type1, type2)')
+            cforce.addTabulatedFunction('acoef',
+                    mm.Discrete2DFunction(num_lj_types, num_lj_types, acoef))
+            cforce.addTabulatedFunction('bcoef',
+                    mm.Discrete2DFunction(num_lj_types, num_lj_types, bcoef))
+            cforce.addPerParticleParameter('type')
+            cforce.setForceGroup(self.NONBONDED_FORCE_GROUP)
+            if (nonbondedMethod is ff.PME or nonbondedMethod is ff.Ewald or
+                        nonbondedMethod is ff.CutoffPeriodic):
+                cforce.setNonbondedMethod(cforce.CutoffPeriodic)
+                cforce.setCutoffDistance(nonbondedCutoff)
+                cforce.setUseLongRangeCorrection(True)
+            elif nonbondedMethod is ff.NoCutoff:
+                cforce.setNonbondedMethod(cforce.NoCutoff)
+            elif nonbondedMethod is ff.CutoffNonPeriodic:
+                cforce.setNonbondedMethod(cforce.CutoffNonPeriodic)
+                cforce.setCutoffDistance(nonbondedCutoff)
+            else:
+                raise ValueError('Unrecognized nonbonded method')
+            if switchDistance and nonbondedMethod is not ff.NoCutoff:
+                # make sure it's legal
+                if switchDistance >= nonbondedCutoff:
+                    raise ValueError('switchDistance is too large compared '
+                                     'to the cutoff!')
+                    cforce.setUseSwitchingFunction(True)
+                    cforce.setSwitchingDistance(switchDistance)
+            for i in lj_idx_list:
+                cforce.addParticle((i - 1,)) # adjust for indexing from 0

        # Add 1-4 interactions
        excluded_atom_pairs = set() # save these pairs so we don't zero them out
@@ -1283,6 +1373,13 @@ class CharmmPsfFile(object):
                    continue
                force.addException(atom.idx, atom2.idx, 0.0, 0.1, 0.0)
        system.addForce(force)
+        # If we needed a CustomNonbondedForce, map all of the exceptions from
+        # the NonbondedForce to the CustomNonbondedForce
+        if has_nbfix_terms:
+            for i in range(force.getNumExceptions()):
+                ii, jj, q, eps, sig = force.getExceptionParameters(i)
+                cforce.addExclusion(ii, jj)
+            system.addForce(cforce)

        # Add GB model if we're doing one
        if implicitSolvent is not None:

--- a/wrappers/python/simtk/openmm/app/internal/charmm/_charmmfile.py
+++ b/wrappers/python/simtk/openmm/app/internal/charmm/_charmmfile.py
@@ -47,7 +47,6 @@ class CharmmFile(object):
    """

    def __init__(self, fname, mode='r'):
-        self.closed = False
        if mode not in ('r', 'w'):
            raise ValueError('Cannot open CharmmFile with mode "%s"' % mode)
        if mode == 'r':
@@ -58,6 +57,7 @@ class CharmmFile(object):
            self._handle = open(fname, mode)
        except IOError, e:
            raise CharmmFileError(str(e))
+        self.closed = False
        self.line_number = 0

    def write(self, *args, **kwargs):

--- a/wrappers/python/simtk/openmm/app/internal/charmm/topologyobjects.py
+++ b/wrappers/python/simtk/openmm/app/internal/charmm/topologyobjects.py
--- a/wrappers/python/tests/TestCharmmFiles.py
+++ b/wrappers/python/tests/TestCharmmFiles.py
@@ -100,6 +100,33 @@ class TestCharmmFiles(unittest.TestCase):
        totalMass2 = sum([system2.getParticleMass(i) for i in range(system2.getNumParticles())]).value_in_unit(amu)
        self.assertAlmostEqual(totalMass1, totalMass2)

+    def test_NBFIX(self):
+        """Tests CHARMM systems with NBFIX Lennard-Jones modifications"""
+        import warnings
+        warnings.filterwarnings('ignore', category=CharmmPSFWarning)
+        psf = CharmmPsfFile('systems/ala3_solv.psf')
+        crd = CharmmCrdFile('systems/ala3_solv.crd')
+        params = CharmmParameterSet('systems/par_all36_prot.prm',
+                                    'systems/toppar_water_ions.str')
+        # Box dimensions (found from bounding box)
+        psf.setBox(32.7119500*angstroms, 32.9959600*angstroms, 33.0071500*angstroms)
+
+        # Turn off charges so we only test the Lennard-Jones energies
+        for a in psf.atom_list:
+            a.charge = 0.0
+
+        # Now compute the full energy
+        plat = Platform.getPlatformByName('Reference')
+        system = psf.createSystem(params, nonbondedMethod=PME,
+                                  nonbondedCutoff=8*angstroms)
+
+        con = Context(system, VerletIntegrator(2*femtoseconds), plat)
+        con.setPositions(crd.positions)
+
+        state = con.getState(getEnergy=True, enforcePeriodicBox=True)
+        ene = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
+        self.assertAlmostEqual(ene, 15490.0033559, delta=0.05)
+
 if __name__ == '__main__':
    unittest.main()

--- a/wrappers/python/tests/systems/ala3_solv.crd
+++ b/wrappers/python/tests/systems/ala3_solv.crd
--- a/wrappers/python/tests/systems/ala3_solv.psf
+++ b/wrappers/python/tests/systems/ala3_solv.psf