CPU FFT selects grid dimensions that can be handled efficiently. Also lots of minor cleanup.

9e74c183 · peastman · f2548616 · 9e74c183 · 9e74c183 · 9e74c183
Commit 9e74c183 authored Jun 28, 2013 by peastman
6 changed files
--- a/platforms/cuda/src/CudaContext.h
+++ b/platforms/cuda/src/CudaContext.h
@@ -644,7 +644,8 @@ public:
     * @param includeForce  true if forces should be computed
     * @param includeEnergy true if potential energy should be computed
     * @param groups        a set of bit flags for which force groups to include
-     * @return an optional contribution to add to the potential energy.     */
+     * @return an optional contribution to add to the potential energy.
+     */
    virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
 };


--- a/platforms/cuda/src/kernels/pme.cu
+++ b/platforms/cuda/src/kernels/pme.cu
@@ -280,4 +280,4 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_
        forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.y*0x100000000));
        forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.z*0x100000000));
    }
-}
\ No newline at end of file
+}
--- a/platforms/opencl/src/OpenCLContext.h
+++ b/platforms/opencl/src/OpenCLContext.h
@@ -750,7 +750,8 @@ public:
     * @param includeForce  true if forces should be computed
     * @param includeEnergy true if potential energy should be computed
     * @param groups        a set of bit flags for which force groups to include
-     * @return an optional contribution to add to the potential energy.     */
+     * @return an optional contribution to add to the potential energy.
+     */
    virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
 };


--- a/plugins/cpupme/src/CpuPmeKernels.cpp
+++ b/plugins/cpupme/src/CpuPmeKernels.cpp
@@ -397,15 +397,15 @@ static void* threadBody(void* args) {
    return 0;
 }

-void CpuCalcPmeReciprocalForceKernel::initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) {
+void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
    if (!hasInitializedThreads) {
        numThreads = getNumProcessors();
        fftwf_init_threads();
        hasInitializedThreads = true;
    }
-    this->gridx = gridx;
-    this->gridy = gridy;
-    this->gridz = gridz;
+    gridx = findFFTDimension(xsize);
+    gridy = findFFTDimension(ysize);
+    gridz = findFFTDimension(zsize);
    this->numParticles = numParticles;
    this->alpha = alpha;
    force.resize(4*numParticles);
@@ -630,3 +630,20 @@ bool CpuCalcPmeReciprocalForceKernel::isProcessorSupported() {
    }
    return false;
 }
+
+int CpuCalcPmeReciprocalForceKernel::findFFTDimension(int minimum) {
+    if (minimum < 1)
+        return 1;
+    while (true) {
+        // Attempt to factor the current value.
+
+        int unfactored = minimum;
+        for (int factor = 2; factor < 12; factor++) {
+            while (unfactored > 1 && unfactored%factor == 0)
+                unfactored /= factor;
+        }
+        if (unfactored == 1)
+            return minimum;
+        minimum++;
+    }
+}
--- a/plugins/cpupme/src/CpuPmeKernels.h
+++ b/plugins/cpupme/src/CpuPmeKernels.h
@@ -42,6 +42,8 @@
 namespace OpenMM {

 /**
+ * This is an optimized CPU implementation of CalcPmeReciprocalForceKernel.  It is both
+ * vectorized (requiring SSE 4.1) and multithreaded.  It uses FFTW to perform the FFTs.
 */

 class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel {
@@ -50,15 +52,53 @@ public:
    CpuCalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform),
            hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) {
    }
-    void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha);
+    /**
+     * Initialize the kernel.
+     * 
+     * @param gridx        the x size of the PME grid
+     * @param gridy        the y size of the PME grid
+     * @param gridz        the z size of the PME grid
+     * @param numParticles the number of particles in the system
+     * @param alpha        the Ewald blending parameter
+     */
+    void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
    ~CpuCalcPmeReciprocalForceKernel();
+    /**
+     * Begin computing the force and energy.
+     * 
+     * @param io               an object that coordinates data transfer
+     * @param periodicBoxSize  the size of the periodic box (measured in nm)
+     * @param includeEnergy    true if potential energy should be computed
+     */
    void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy);
+    /**
+     * Finish computing the force and energy.
+     * 
+     * @param io   an object that coordinates data transfer
+     * @return the potential energy due to the PME reciprocal space interactions
+     */
    double finishComputation(IO& io);
+    /**
+     * This routine contains the code executed by each thread.
+     */
    void runThread(int index);
+    /**
+     * Get whether the current CPU supports all features needed by this kernel.
+     */
    static bool isProcessorSupported();
 private:
+    /**
+     * This is called by the worker threads to wait until the master thread instructs them to advance.
+     */
    void threadWait();
+    /**
+     * This is called by the master thread to instruct all the worker threads to advance.
+     */
    void advanceThreads();
+    /**
+     * Select a size for one grid dimension that FFTW can handle efficiently.
+     */
+    int findFFTDimension(int minimum);
    static bool hasInitializedThreads;
    static int numThreads;
    int gridx, gridy, gridz, numParticles;

--- a/plugins/cpupme/tests/TestCpuPme.cpp
+++ b/plugins/cpupme/tests/TestCpuPme.cpp
@@ -83,6 +83,7 @@ void testPME() {
    force->setNonbondedMethod(NonbondedForce::PME);
    force->setCutoffDistance(cutoff);
    force->setReciprocalSpaceForceGroup(1);
+    force->setEwaldErrorTolerance(1e-4);
    
    // Compute the reciprocal space forces with the reference platform.
    
@@ -116,9 +117,9 @@ void testPME() {
    
    // See if they match.
    
-    ASSERT_EQUAL_TOL(refState.getPotentialEnergy(), energy+ewaldSelfEnergy, 1e-4);
+    ASSERT_EQUAL_TOL(refState.getPotentialEnergy(), energy+ewaldSelfEnergy, 1e-3);
    for (int i = 0; i < numParticles; i++)
-        ASSERT_EQUAL_VEC(refState.getForces()[i], Vec3(io.force[4*i], io.force[4*i+1], io.force[4*i+2]), 1e-4);
+        ASSERT_EQUAL_VEC(refState.getForces()[i], Vec3(io.force[4*i], io.force[4*i+1], io.force[4*i+2]), 1e-3);
 }

 int main(int argc, char* argv[]) {