Merge branch 'upstream' into fork

3862202e · Justin MacCallum · e1a4e015 · 73882ac5 · 3862202e · 3862202e
Commit 3862202e authored Jul 12, 2013 by Justin MacCallum
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -396,6 +396,20 @@ IF(OPENMM_BUILD_DRUDE_PLUGIN)
   ADD_SUBDIRECTORY(plugins/drude)
 ENDIF(OPENMM_BUILD_DRUDE_PLUGIN)

+# CPU PME plugin
+
+FIND_PACKAGE(FFTW QUIET)
+IF(FFTW_FOUND)
+    SET(OPENMM_BUILD_PME_PLUGIN ON CACHE BOOL "Build CPU PME plugin")
+ELSE(FFTW_FOUND)
+    SET(OPENMM_BUILD_PME_PLUGIN OFF CACHE BOOL "Build CPU PME plugin")
+ENDIF(FFTW_FOUND)
+SET(OPENMM_BUILD_PME_PATH)
+IF(OPENMM_BUILD_PME_PLUGIN)
+   SET(OPENMM_BUILD_PME_PATH ${CMAKE_CURRENT_SOURCE_DIR}/plugins/cpupme)
+   ADD_SUBDIRECTORY(plugins/cpupme)
+ENDIF(OPENMM_BUILD_PME_PLUGIN)
+
 INSTALL_TARGETS(/lib RUNTIME_DIRECTORY /lib ${SHARED_TARGET})
 IF(OPENMM_BUILD_STATIC_LIB)
  INSTALL_TARGETS(/lib RUNTIME_DIRECTORY /lib ${STATIC_TARGET})
@@ -403,9 +417,11 @@ ENDIF(OPENMM_BUILD_STATIC_LIB)
 FILE(GLOB CORE_HEADERS     include/*.h          */include/*.h)
 FILE(GLOB TOP_HEADERS      include/openmm/*.h          */include/openmm/*.h)
 FILE(GLOB INTERNAL_HEADERS include/openmm/internal/*.h */include/openmm/internal/*.h )
+FILE(GLOB REFERENCE_HEADERS platforms/reference/include/*.h)
 INSTALL_FILES(/include                 FILES ${CORE_HEADERS})
 INSTALL_FILES(/include/openmm          FILES ${TOP_HEADERS})
 INSTALL_FILES(/include/openmm/internal FILES ${INTERNAL_HEADERS})
+INSTALL_FILES(/include/openmm/reference FILES ${REFERENCE_HEADERS})

 # Serialization support


--- a/cmake_modules/FindFFTW.cmake
+++ b/cmake_modules/FindFFTW.cmake
+# - Find FFTW
+# Find the native FFTW includes and library
+#
+#  FFTW_INCLUDES        - where to find fftw3.h
+#  FFTW_LIBRARY         - the main FFTW library.
+#  FFTW_THREADS_LIBRARY - the FFTW multithreading support library.
+#  FFTW_FOUND           - True if FFTW found.
+
+if (FFTW_INCLUDES)
+  # Already in cache, be silent
+  set (FFTW_FIND_QUIETLY TRUE)
+endif (FFTW_INCLUDES)
+
+find_path (FFTW_INCLUDES fftw3.h)
+
+find_library (FFTW_LIBRARY NAMES fftw3f)
+find_library (FFTW_THREADS_LIBRARY NAMES fftw3f_threads)
+
+# handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if
+# all listed variables are TRUE
+include (FindPackageHandleStandardArgs)
+find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARY FFTW_INCLUDES)
+find_package_handle_standard_args (FFTW_THREADS DEFAULT_MSG FFTW_THREADS_LIBRARY FFTW_INCLUDES)
+
+mark_as_advanced (FFTW_LIBRARY FFTW_THREADS_LIBRARY FFTW_INCLUDES)
--- a/libraries/lepton/src/Parser.cpp
+++ b/libraries/lepton/src/Parser.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2009-2011 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2013 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -112,7 +112,7 @@ ParseToken Parser::getNextToken(const string& expression, int start) {
            }
            if ((c == 'e' || c == 'E') && !foundExp) {
                foundExp = true;
-                if (pos < (int) expression.size()-1 && expression[pos+1] == '-')
+                if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
                    pos++;
                continue;
            }

--- a/olla/include/openmm/kernels.h
+++ b/olla/include/openmm/kernels.h
@@ -1152,6 +1152,70 @@ public:
    virtual void execute(ContextImpl& context) = 0;
 };

+/**
+ * This kernel performs the reciprocal space calculation for PME.  In most cases, this
+ * calculation is done directly by CalcNonbondedForceKernel so this kernel is unneeded.
+ * In some cases it may want to outsource the work to a different kernel.  In particular,
+ * GPU based platforms sometimes use a CPU based implementation provided by a separate
+ * plugin.
+ */
+class CalcPmeReciprocalForceKernel : public KernelImpl {
+public:
+    class IO;
+    static std::string Name() {
+        return "CalcPmeReciprocalForce";
+    }
+    CalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : KernelImpl(name, platform) {
+    }
+    /**
+     * Initialize the kernel.
+     * 
+     * @param gridx        the x size of the PME grid
+     * @param gridy        the y size of the PME grid
+     * @param gridz        the z size of the PME grid
+     * @param numParticles the number of particles in the system
+     * @param alpha        the Ewald blending parameter
+     */
+    virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
+    /**
+     * Begin computing the force and energy.
+     * 
+     * @param io               an object that coordinates data transfer
+     * @param periodicBoxSize  the size of the periodic box (measured in nm)
+     * @param includeEnergy    true if potential energy should be computed
+     */
+    virtual void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) = 0;
+    /**
+     * Finish computing the force and energy.
+     * 
+     * @param io   an object that coordinates data transfer
+     * @return the potential energy due to the PME reciprocal space interactions
+     */
+    virtual double finishComputation(IO& io) = 0;
+};
+
+/**
+ * Any class that uses CalcPmeReciprocalForceKernel should create an implementation of this
+ * class, then pass it to the kernel to manage communication with it.
+ */
+class CalcPmeReciprocalForceKernel::IO {
+public:
+    /**
+     * Get a pointer to the atom charges and positions.  This array should contain four
+     * elements for each atom: x, y, z, and q in that order.
+     */
+    virtual float* getPosq() = 0;
+    /**
+     * Record the forces calculated by the kernel.
+     * 
+     * @param force    an array containing four elements for each atom.  The first three
+     *                 are the x, y, and z components of the force, while the fourth element
+     *                 should be ignored.
+     */
+    virtual void setForce(float* force) = 0;
+};
+
+
 } // namespace OpenMM

 #endif /*OPENMM_KERNELS_H_*/
--- a/openmmapi/include/openmm/GBSAOBCForce.h
+++ b/openmmapi/include/openmm/GBSAOBCForce.h
@@ -40,13 +40,19 @@ namespace OpenMM {

 /**
 * This class implements an implicit solvation force using the GBSA-OBC model.
- * <p>
+ * 
 * To use this class, create a GBSAOBCForce object, then call addParticle() once for each particle in the
 * System to define its parameters.  The number of particles for which you define GBSA parameters must
 * be exactly equal to the number of particles in the System, or else an exception will be thrown when you
 * try to create a Context.  After a particle has been added, you can modify its force field parameters
 * by calling setParticleParameters().  This will have no effect on Contexts that already exist unless you
 * call updateParametersInContext().
+ * 
+ * When using this Force, the System should also include a NonbondedForce, and both objects must specify
+ * identical charges for all particles.  Otherwise, the results will not be correct.  Furthermore, if the
+ * nonbonded method is set to CutoffNonPeriodic or CutoffPeriodic, you should call setReactionFieldDielectric(1.0)
+ * on the NonbondedForce to turn off the reaction field approximation, which does not produce correct results
+ * when combined with GBSA.
 */

 class OPENMM_EXPORT GBSAOBCForce : public Force {
@@ -70,7 +76,7 @@ public:
         */
        CutoffPeriodic = 2,
    };
-    /*
+    /**
     * Create a GBSAOBCForce.
     */
    GBSAOBCForce();

--- a/openmmapi/src/Context.cpp
+++ b/openmmapi/src/Context.cpp
@@ -33,7 +33,7 @@
 #include "openmm/internal/ContextImpl.h"
 #include "openmm/OpenMMException.h"
 #include "openmm/internal/ForceImpl.h"
-#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include "SimTKOpenMMRealType.h"
 #include "sfmt/SFMT.h"
 #include <cmath>


--- a/platforms/cuda/CMakeLists.txt
+++ b/platforms/cuda/CMakeLists.txt
@@ -102,4 +102,9 @@ SET(CUDA_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.h)
 SET(SOURCE_FILES ${SOURCE_FILES} ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H})
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)

+# Install headers
+
+FILE(GLOB CORE_HEADERS include/*.h)
+INSTALL_FILES(/include/openmm/cuda FILES ${CORE_HEADERS})
+
 SUBDIRS (sharedTarget)
--- a/platforms/cuda/src/CudaArray.h
+++ b/platforms/cuda/src/CudaArray.h
--- a/platforms/cuda/src/CudaBondedUtilities.h
+++ b/platforms/cuda/src/CudaBondedUtilities.h
--- a/platforms/cuda/src/CudaContext.h
+++ b/platforms/cuda/src/CudaContext.h
@@ -70,6 +70,8 @@ public:
    class WorkTask;
    class WorkThread;
    class ReorderListener;
+    class ForcePreComputation;
+    class ForcePostComputation;
    static const int ThreadBlockSize;
    static const int TileSize;
    CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
@@ -454,6 +456,28 @@ public:
    std::vector<ReorderListener*>& getReorderListeners() {
        return reorderListeners;
    }
+    /**
+     * Add a pre-computation that should be called at the very start of force and energy evaluations.
+     * The CudaContext assumes ownership of the object, and deletes it when the context itself is deleted.
+     */
+    void addPreComputation(ForcePreComputation* computation);
+    /**
+     * Get the list of ForcePreComputations.
+     */
+    std::vector<ForcePreComputation*>& getPreComputations() {
+        return preComputations;
+    }
+    /**
+     * Add a post-computation that should be called at the very end of force and energy evaluations.
+     * The CudaContext assumes ownership of the object, and deletes it when the context itself is deleted.
+     */
+    void addPostComputation(ForcePostComputation* computation);
+    /**
+     * Get the list of ForcePostComputations.
+     */
+    std::vector<ForcePostComputation*>& getPostComputations() {
+        return postComputations;
+    }
    /**
     * Mark that the current molecule definitions (and hence the atom order) may be invalid.
     * This should be called whenever force field parameters change.  It will cause the definitions
@@ -519,6 +543,8 @@ private:
    std::vector<CUdeviceptr> autoclearBuffers;
    std::vector<int> autoclearBufferSizes;
    std::vector<ReorderListener*> reorderListeners;
+    std::vector<ForcePreComputation*> preComputations;
+    std::vector<ForcePostComputation*> postComputations;
    CudaIntegrationUtilities* integration;
    CudaExpressionUtilities* expression;
    CudaBondedUtilities* bonded;
@@ -580,7 +606,7 @@ private:

 /**
 * This abstract class defines a function to be executed whenever atoms get reordered.
- * Objects that need to know when reordering happens should create a reorderListener
+ * Objects that need to know when reordering happens should create a ReorderListener
 * and register it by calling addReorderListener().
 */
 class CudaContext::ReorderListener {
@@ -590,6 +616,39 @@ public:
    }
 };

+/**
+ * This abstract class defines a function to be executed at the very beginning of force and
+ * energy evaluation, before any other calculation has been done.  It is useful for operations
+ * that need to be performed at a nonstandard point in the process.  After creating a
+ * ForcePreComputation, register it by calling addForcePreComputation().
+ */
+class CudaContext::ForcePreComputation {
+public:
+    /**
+     * @param includeForce  true if forces should be computed
+     * @param includeEnergy true if potential energy should be computed
+     * @param groups        a set of bit flags for which force groups to include
+     */
+    virtual void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
+};
+
+/**
+ * This abstract class defines a function to be executed at the very end of force and
+ * energy evaluation, after all other calculations have been done.  It is useful for operations
+ * that need to be performed at a nonstandard point in the process.  After creating a
+ * ForcePostComputation, register it by calling addForcePostComputation().
+ */
+class CudaContext::ForcePostComputation {
+public:
+    /**
+     * @param includeForce  true if forces should be computed
+     * @param includeEnergy true if potential energy should be computed
+     * @param groups        a set of bit flags for which force groups to include
+     * @return an optional contribution to add to the potential energy.
+     */
+    virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
+};
+
 } // namespace OpenMM

 #endif /*OPENMM_CUDACONTEXT_H_*/
--- a/platforms/cuda/src/CudaExpressionUtilities.h
+++ b/platforms/cuda/src/CudaExpressionUtilities.h
--- a/platforms/cuda/src/CudaForceInfo.h
+++ b/platforms/cuda/src/CudaForceInfo.h
--- a/platforms/cuda/src/CudaIntegrationUtilities.h
+++ b/platforms/cuda/src/CudaIntegrationUtilities.h
--- a/platforms/cuda/src/CudaKernels.h
+++ b/platforms/cuda/src/CudaKernels.h
@@ -557,7 +557,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
 public:
    CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
            cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
-            pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL),  pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL) {
+            pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL),  pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), pmeio(NULL) {
    }
    ~CudaCalcNonbondedForceKernel();
    /**
@@ -596,6 +596,9 @@ private:
        const char* getMaxValue() const {return "make_int2(INT_MAX, INT_MAX)";}
        const char* getSortKey() const {return "value.y";}
    };
+    class PmeIO;
+    class PmePreComputation;
+    class PmePostComputation;
    CudaContext& cu;
    bool hasInitializedFFT;
    CudaArray* sigmaEpsilon;
@@ -609,6 +612,8 @@ private:
    CudaArray* pmeAtomRange;
    CudaArray* pmeAtomGridIndex;
    CudaSort* sort;
+    Kernel cpuPme;
+    PmeIO* pmeio;
    cufftHandle fftForward;
    cufftHandle fftBackward;
    CUfunction ewaldSumsKernel;

--- a/platforms/cuda/src/CudaNonbondedUtilities.h
+++ b/platforms/cuda/src/CudaNonbondedUtilities.h
--- a/platforms/cuda/src/CudaParallelKernels.h
+++ b/platforms/cuda/src/CudaParallelKernels.h
--- a/platforms/cuda/src/CudaParameterSet.h
+++ b/platforms/cuda/src/CudaParameterSet.h
--- a/platforms/cuda/include/CudaPlatform.h
+++ b/platforms/cuda/include/CudaPlatform.h
@@ -81,6 +81,13 @@ public:
        static const std::string key = "CudaPrecision";
        return key;
    }
+    /**
+     * This is the name of the parameter for selecting whether to use the CPU based PME calculation.
+     */
+    static const std::string& CudaUseCpuPme() {
+        static const std::string key = "CudaUseCpuPme";
+        return key;
+    }
    /**
     * This is the name of the parameter for specifying the path to the CUDA compiler.
     */
@@ -99,14 +106,15 @@ public:

 class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
 public:
-    PlatformData(const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
-            const std::string& compilerProperty, const std::string& tempProperty);
+    PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
+            const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty);
    ~PlatformData();
    void initializeContexts(const System& system);
    void syncContexts();
+    ContextImpl* context;
    std::vector<CudaContext*> contexts;
    std::vector<double> contextEnergy;
-    bool removeCM, peerAccessSupported;
+    bool removeCM, peerAccessSupported, useCpuPme;
    int cmMotionFrequency;
    int stepCount, computeForceCount;
    double time;

--- a/platforms/cuda/src/CudaSort.h
+++ b/platforms/cuda/src/CudaSort.h
--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp
@@ -231,6 +231,10 @@ CudaContext::~CudaContext() {
        delete forces[i];
    for (int i = 0; i < (int) reorderListeners.size(); i++)
        delete reorderListeners[i];
+    for (int i = 0; i < (int) preComputations.size(); i++)
+        delete preComputations[i];
+    for (int i = 0; i < (int) postComputations.size(); i++)
+        delete postComputations[i];
    if (pinnedBuffer != NULL)
        cuMemFreeHost(pinnedBuffer);
    if (posq != NULL)
@@ -743,6 +747,7 @@ void CudaContext::findMoleculeGroups() {
            for (int j = 0; j < forces[i]->getNumParticleGroups(); j++) {
                vector<int> particles;
                forces[i]->getParticlesInGroup(j, particles);
+                if (particles.size() > 0)
                    molecules[atomMolecule[particles[0]]].groups[i].push_back(j);
            }
    }
@@ -1102,6 +1107,14 @@ void CudaContext::addReorderListener(ReorderListener* listener) {
    reorderListeners.push_back(listener);
 }

+void CudaContext::addPreComputation(ForcePreComputation* computation) {
+    preComputations.push_back(computation);
+}
+
+void CudaContext::addPostComputation(ForcePostComputation* computation) {
+    postComputations.push_back(computation);
+}
+
 struct CudaContext::WorkThread::ThreadData {
    ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting,  bool& finished,
            pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :