Merge remote-tracking branch 'upstream/master'

047934e2 · Rafal P. Wiewiora · ce3a5dc0 · d12c9bd1 · 047934e2 · 047934e2
Commit 047934e2 authored Mar 01, 2017 by Rafal P. Wiewiora
20 changed files
--- a/platforms/cpu/src/CpuCustomManyParticleForce.cpp
+++ b/platforms/cpu/src/CpuCustomManyParticleForce.cpp
-/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -37,16 +37,6 @@
 using namespace OpenMM;
 using namespace std;
-class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
-public:
-    ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadComputeForce(threads, threadIndex);
-    }
-    CpuCustomManyParticleForce& owner;
-};
 CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
            threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
    numParticles = force.getNumParticles();
@@ -98,7 +88,7 @@ CpuCustomManyParticleForce::~CpuCustomManyParticleForce() {
        delete threadData[i];
 }
-void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpenMM** particleParameters,
+void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, double** particleParameters,
                                                  const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
                                                  bool includeForces, bool includeEnergy, double& energy) {
    // Record the parameters for the threads.
@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpe
    // Signal the threads to start running and wait for them to finish.
-    ComputeForceTask task(*this);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
-    threads.execute(task);
    threads.waitForThreads();
    // Combine the energies from all the threads.
@@ -191,14 +180,14 @@ void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int thr
    }
 }
-void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
+void CpuCustomManyParticleForce::setUseCutoff(double distance) {
    useCutoff = true;
    cutoffDistance = distance;
    if (neighborList == NULL)
        neighborList = new CpuNeighborList(4);
 }
-void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) {
+void CpuCustomManyParticleForce::setPeriodic(Vec3* periodicBoxVectors) {
    assert(useCutoff);
    assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
    assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
@@ -220,7 +209,7 @@ void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) {
 }
 void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availableParticles, vector<int>& particleSet, int loopIndex, int startIndex,
-                                                          RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
+                                                          double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
    int numParticles = availableParticles.size();
    double cutoff2 = cutoffDistance*cutoffDistance;
    int checkRange = (centralParticleMode ? 1 : loopIndex);
@@ -254,7 +243,7 @@ void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availablePart
    }
 }
-void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
+void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
    // Select the ordering to use for the particles.
    vector<int>& permutedParticles = data.permutedParticles;

--- a/platforms/cpu/src/CpuCustomNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuCustomNonbondedForce.cpp
-/* Portions copyright (c) 2009-2016 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -33,16 +33,6 @@
 using namespace OpenMM;
 using namespace std;
-class CpuCustomNonbondedForce::ComputeForceTask : public ThreadPool::Task {
-public:
-    ComputeForceTask(CpuCustomNonbondedForce& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadComputeForce(threads, threadIndex);
-    }
-    CpuCustomNonbondedForce& owner;
-};
 CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
            const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
            energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
@@ -70,7 +60,7 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
 CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
            const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
            const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) :
-            cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
+            cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
    for (int i = 0; i < threads.getNumThreads(); i++)
        threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions));
 }
@@ -80,13 +70,14 @@ CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
        delete threadData[i];
 }
-void CpuCustomNonbondedForce::setUseCutoff(RealOpenMM distance, const CpuNeighborList& neighbors) {
+void CpuCustomNonbondedForce::setUseCutoff(double distance, const CpuNeighborList& neighbors) {
    cutoff = true;
    cutoffDistance = distance;
    neighborList = &neighbors;
  }
 void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) {
+    useInteractionGroups = true;
    for (int group = 0; group < (int) groups.size(); group++) {
        const set<int>& set1 = groups[group].first;
        const set<int>& set2 = groups[group].second;
@@ -102,12 +93,12 @@ void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, s
    }
 }
-void CpuCustomNonbondedForce::setUseSwitchingFunction(RealOpenMM distance) {
+void CpuCustomNonbondedForce::setUseSwitchingFunction(double distance) {
    useSwitch = true;
    switchingDistance = distance;
 }
-void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
+void CpuCustomNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
    assert(cutoff);
    assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
    assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
@@ -129,9 +120,9 @@ void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
 }
-void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates, RealOpenMM** atomParameters,
+void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<Vec3>& atomCoordinates, double** atomParameters,
-                                             RealOpenMM* fixedParameters, const map<string, double>& globalParameters,
+                                               double* fixedParameters, const map<string, double>& globalParameters,
-                                             vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
+                                               vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
    // Record the parameters for the threads.
    this->numberOfAtoms = numberOfAtoms;
@@ -149,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
    // Signal the threads to start running and wait for them to finish.
-    ComputeForceTask task(*this);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
-    threads.execute(task);
    threads.waitForThreads();
    // Combine the energies from all the threads.
@@ -183,7 +173,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
        data.energyParamDerivs[i] = 0.0;
    fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
    fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
-    if (groupInteractions.size() > 0) {
+    if (useInteractionGroups) {
        // The user has specified interaction groups, so compute only the requested interactions.
        while (true) {

--- a/platforms/cpu/src/CpuGBSAOBCForce.cpp
+++ b/platforms/cpu/src/CpuGBSAOBCForce.cpp
+/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
-/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -37,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
 const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
 const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
-class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
-public:
-    ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadComputeForce(threads, threadIndex);
-    }
-    CpuGBSAOBCForce& owner;
-};
 CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
    logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
    logDXInv = 1.0f/logDX;
@@ -89,6 +78,10 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f
    particleParams = params;
    bornRadii.resize(params.size()+3);
    obcChain.resize(params.size()+3);
+    for (int i = bornRadii.size()-3; i < bornRadii.size(); i++) {
+        bornRadii[i] = 0;
+        obcChain[i] = 0;
+    }
 }
 void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
@@ -107,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align
    // Signal the threads to start running and wait for them to finish.
-    ComputeTask task(*this);
    gmx_atomic_set(&counter, 0);
-    threads.execute(task);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
    threads.waitForThreads(); // Compute Born radii
    gmx_atomic_set(&counter, 0);
    threads.resumeThreads();

--- a/platforms/cpu/src/CpuGayBerneForce.cpp
+++ b/platforms/cpu/src/CpuGayBerneForce.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2016 Stanford University and the Authors.           *
+ * Portions copyright (c) 2016-2017 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -44,17 +44,6 @@
 using namespace OpenMM;
 using namespace std;
-class CpuGayBerneForce::ComputeTask : public ThreadPool::Task {
-public:
-    ComputeTask(CpuGayBerneForce& owner, CpuNeighborList* neighborList) : owner(owner), neighborList(neighborList) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadComputeForce(threads, threadIndex, neighborList);
-    }
-    CpuGayBerneForce& owner;
-    CpuNeighborList* neighborList;
-};
 CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
    // Record the force parameters.
@@ -111,7 +100,7 @@ const vector<set<int> >& CpuGayBerneForce::getExclusions() const {
    return particleExclusions;
 }
-RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, std::vector<RealVec>& forces, std::vector<AlignedArray<float> >& threadForce, RealVec* boxVectors, CpuPlatform::PlatformData& data) {
+double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vector<Vec3>& forces, std::vector<AlignedArray<float> >& threadForce, Vec3* boxVectors, CpuPlatform::PlatformData& data) {
    if (nonbondedMethod == GayBerneForce::CutoffPeriodic) {
        double minAllowedSize = 1.999999*cutoffDistance;
        if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
@@ -137,8 +126,7 @@ RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, st
    // Signal the threads to compute the pairwise interactions.
-    ComputeTask task(*this, data.neighborList);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex, data.neighborList); });
-    threads.execute(task);
    threads.waitForThreads();
    // Signal the threads to compute exceptions.
@@ -164,10 +152,10 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
    int numThreads = threads.getNumThreads();
    threadEnergy[threadIndex] = 0;
    float* forces = &(*threadForce)[threadIndex][0];
-    vector<RealVec>& torques = threadTorque[threadIndex];
+    vector<Vec3>& torques = threadTorque[threadIndex];
    torques.resize(numParticles);
    for (int i = 0; i < numParticles; i++)
-        torques[i] = RealVec();
+        torques[i] = Vec3();
    double energy = 0.0;
    // Compute this thread's subset of interactions.
@@ -184,8 +172,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
                    continue;
                if (particleExclusions[i].find(j) != particleExclusions[i].end())
                    continue; // This interaction will be handled by an exception.
-                RealOpenMM sigma = particles[i].sigmaOver2+particles[j].sigmaOver2;
+                double sigma = particles[i].sigmaOver2+particles[j].sigmaOver2;
-                RealOpenMM epsilon = particles[i].sqrtEpsilon*particles[j].sqrtEpsilon;
+                double epsilon = particles[i].sqrtEpsilon*particles[j].sqrtEpsilon;
                energy += computeOneInteraction(i, j, sigma, epsilon, positions, forces, torques, boxVectors);
            }
        }
@@ -208,8 +196,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
                        int second = blockAtom[k];
                        if (particles[second].sqrtEpsilon == 0.0f)
                            continue;
-                        RealOpenMM sigma = particles[first].sigmaOver2+particles[second].sigmaOver2;
+                        double sigma = particles[first].sigmaOver2+particles[second].sigmaOver2;
-                        RealOpenMM epsilon = particles[first].sqrtEpsilon*particles[second].sqrtEpsilon;
+                        double epsilon = particles[first].sqrtEpsilon*particles[second].sqrtEpsilon;
                        energy += computeOneInteraction(first, second, sigma, epsilon, positions, forces, torques, boxVectors);
                    }
                }
@@ -235,39 +223,39 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
    threadEnergy[threadIndex] = energy;
 }
-void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions) {
+void CpuGayBerneForce::computeEllipsoidFrames(const vector<Vec3>& positions) {
    int numParticles = particles.size();
    for (int particle = 0; particle < numParticles; particle++) {
        ParticleInfo& p = particles[particle];
        // Compute the local coordinate system of the ellipsoid;
-        RealVec xdir, ydir, zdir;
+        Vec3 xdir, ydir, zdir;
        if (p.xparticle == -1) {
-            xdir = RealVec(1, 0, 0);
+            xdir = Vec3(1, 0, 0);
-            ydir = RealVec(0, 1, 0);
+            ydir = Vec3(0, 1, 0);
        }
        else {
            xdir = positions[particle]-positions[p.xparticle];
-            xdir /= SQRT(xdir.dot(xdir));
+            xdir /= sqrt(xdir.dot(xdir));
            if (p.yparticle == -1) {
                if (xdir[1] > -0.5 && xdir[1] < 0.5)
-                    ydir = RealVec(0, 1, 0);
+                    ydir = Vec3(0, 1, 0);
                else
-                    ydir = RealVec(1, 0, 0);
+                    ydir = Vec3(1, 0, 0);
            }
            else
                ydir = positions[particle]-positions[p.yparticle];
            ydir -= xdir*(xdir.dot(ydir));
-            ydir /= SQRT(ydir.dot(ydir));
+            ydir /= sqrt(ydir.dot(ydir));
        }
        zdir = xdir.cross(ydir);
        // Compute matrices we will need later.
-        RealOpenMM (&a)[3][3] = A[particle].v;
+        double (&a)[3][3] = A[particle].v;
-        RealOpenMM (&b)[3][3] = B[particle].v;
+        double (&b)[3][3] = B[particle].v;
-        RealOpenMM (&g)[3][3] = G[particle].v;
+        double (&g)[3][3] = G[particle].v;
        a[0][0] = xdir[0];
        a[0][1] = xdir[1];
        a[0][2] = xdir[2];
@@ -277,8 +265,8 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions)
        a[2][0] = zdir[0];
        a[2][1] = zdir[1];
        a[2][2] = zdir[2];
-        RealVec r2(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz);
+        Vec3 r2(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz);
-        RealVec e2(1/sqrt(p.ex), 1/sqrt(p.ey), 1/sqrt(p.ez));
+        Vec3 e2(1/sqrt(p.ex), 1/sqrt(p.ey), 1/sqrt(p.ez));
        for (int i = 0; i < 3; i++)
            for (int j = 0; j < 3; j++) {
                b[i][j] = 0;
@@ -291,33 +279,33 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions)
    }
 }
-void CpuGayBerneForce::applyTorques(const vector<RealVec>& positions, vector<RealVec>& forces) {
+void CpuGayBerneForce::applyTorques(const vector<Vec3>& positions, vector<Vec3>& forces) {
    int numParticles = particles.size();
    int numThreads = threadTorque.size();
    for (int particle = 0; particle < numParticles; particle++) {
        ParticleInfo& p = particles[particle];
-        RealVec pos = positions[particle];
+        Vec3 pos = positions[particle];
        if (p.xparticle != -1) {
            // Add up the torques from the individual threads.
-            RealVec torque;
+            Vec3 torque;
            for (int i = 0; i < numThreads; i++)
                torque += threadTorque[i][particle];
            // Apply a force to the x particle.
-            RealVec dx = positions[p.xparticle]-pos;
+            Vec3 dx = positions[p.xparticle]-pos;
            double dx2 = dx.dot(dx);
-            RealVec f = torque.cross(dx)/dx2;
+            Vec3 f = torque.cross(dx)/dx2;
            forces[p.xparticle] += f;
            forces[particle] -= f;
            if (p.yparticle != -1) {
                // Apply a force to the y particle.  This is based on the component of the torque
                // that was not already applied to the x particle.
-                RealVec dy = positions[p.yparticle]-pos;
+                Vec3 dy = positions[p.yparticle]-pos;
                double dy2 = dy.dot(dy);
-                RealVec torque2 = dx*(torque.dot(dx)/dx2);
+                Vec3 torque2 = dx*(torque.dot(dx)/dx2);
                f = torque2.cross(dy)/dy2;
                forces[p.yparticle] += f;
                forces[particle] -= f;
@@ -326,27 +314,27 @@ void CpuGayBerneForce::applyTorques(const vector<RealVec>& positions, vector<Rea
    }
 }
-RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, RealOpenMM sigma, RealOpenMM epsilon, const RealVec* positions,
+double CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, double sigma, double epsilon, const Vec3* positions,
-        float* forces, vector<RealVec>& torques, const RealVec* boxVectors) {
+        float* forces, vector<Vec3>& torques, const Vec3* boxVectors) {
    // Compute the displacement and check against the cutoff.
-    RealOpenMM deltaR[ReferenceForce::LastDeltaRIndex];
+    double deltaR[ReferenceForce::LastDeltaRIndex];
    if (nonbondedMethod == GayBerneForce::CutoffPeriodic)
        ReferenceForce::getDeltaRPeriodic(positions[particle2], positions[particle1], boxVectors, deltaR);
    else
        ReferenceForce::getDeltaR(positions[particle2], positions[particle1], deltaR);
-    RealOpenMM r = deltaR[ReferenceForce::RIndex];
+    double r = deltaR[ReferenceForce::RIndex];
    if (nonbondedMethod != GayBerneForce::NoCutoff && r >= cutoffDistance)
        return 0;
-    RealOpenMM rInv = 1/r;
+    double rInv = 1/r;
-    RealVec dr(deltaR[ReferenceForce::XIndex], deltaR[ReferenceForce::YIndex], deltaR[ReferenceForce::ZIndex]);
+    Vec3 dr(deltaR[ReferenceForce::XIndex], deltaR[ReferenceForce::YIndex], deltaR[ReferenceForce::ZIndex]);
-    RealVec drUnit = dr*rInv;
+    Vec3 drUnit = dr*rInv;
    // Compute the switching function.
-    RealOpenMM switchValue = 1, switchDeriv = 0;
+    double switchValue = 1, switchDeriv = 0;
    if (useSwitchingFunction && r > switchingDistance) {
-        RealOpenMM t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
+        double t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
        switchValue = 1+t*t*t*(-10+t*(15-t*6));
        switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
    }
@@ -354,11 +342,11 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
    // Interactions between two point particles can be computed more easily.
    if (particles[particle1].isPointParticle && particles[particle2].isPointParticle) {
-        RealOpenMM sig = sigma*rInv;
+        double sig = sigma*rInv;
-        RealOpenMM sig2 = sig*sig;
+        double sig2 = sig*sig;
-        RealOpenMM sig6 = sig2*sig2*sig2;
+        double sig6 = sig2*sig2*sig2;
-        RealOpenMM energy = 4*epsilon*(sig6-1)*sig6;
+        double energy = 4*epsilon*(sig6-1)*sig6;
-        RealVec force = drUnit*(switchValue*4*epsilon*(12*sig6 - 6)*sig6*rInv - energy*switchDeriv);
+        Vec3 force = drUnit*(switchValue*4*epsilon*(12*sig6 - 6)*sig6*rInv - energy*switchDeriv);
        forces[4*particle1] += force[0];
        forces[4*particle1+1] += force[1];
        forces[4*particle1+2] += force[2];
@@ -374,31 +362,31 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
    Matrix G12 = G[particle1]+G[particle2];
    Matrix B12inv = B12.inverse();
    Matrix G12inv = G12.inverse();
-    RealOpenMM detG12 = G12.determinant();
+    double detG12 = G12.determinant();
    // Estimate the distance between the ellipsoids and compute the first terms needed for the energy.
-    RealOpenMM sigma12 = 1/SQRT(0.5*drUnit.dot(G12inv*drUnit));
+    double sigma12 = 1/sqrt(0.5*drUnit.dot(G12inv*drUnit));
-    RealOpenMM h12 = r - sigma12;
+    double h12 = r - sigma12;
-    RealOpenMM rho = sigma/(h12+sigma);
+    double rho = sigma/(h12+sigma);
-    RealOpenMM rho2 = rho*rho;
+    double rho2 = rho*rho;
-    RealOpenMM rho6 = rho2*rho2*rho2;
+    double rho6 = rho2*rho2*rho2;
-    RealOpenMM u = 4*epsilon*(rho6*rho6-rho6);
+    double u = 4*epsilon*(rho6*rho6-rho6);
-    RealOpenMM eta = SQRT(2*s[particle1]*s[particle2]/detG12);
+    double eta = sqrt(2*s[particle1]*s[particle2]/detG12);
-    RealOpenMM chi = 2*drUnit.dot(B12inv*drUnit);
+    double chi = 2*drUnit.dot(B12inv*drUnit);
    chi *= chi;
-    RealOpenMM energy = u*eta*chi;
+    double energy = u*eta*chi;
    // Compute the terms needed for the force.
-    RealVec kappa = G12inv*dr;
+    Vec3 kappa = G12inv*dr;
-    RealVec iota = B12inv*dr;
+    Vec3 iota = B12inv*dr;
-    RealOpenMM rInv2 = rInv*rInv;
+    double rInv2 = rInv*rInv;
-    RealOpenMM dUSLJdr = 24*epsilon*(2*rho6-1)*rho6*rho/sigma;
+    double dUSLJdr = 24*epsilon*(2*rho6-1)*rho6*rho/sigma;
-    RealOpenMM temp = 0.5*sigma12*sigma12*sigma12*rInv2;
+    double temp = 0.5*sigma12*sigma12*sigma12*rInv2;
-    RealVec dudr = (drUnit + (kappa-drUnit*kappa.dot(drUnit))*temp)*dUSLJdr;
+    Vec3 dudr = (drUnit + (kappa-drUnit*kappa.dot(drUnit))*temp)*dUSLJdr;
-    RealVec dchidr = (iota-drUnit*iota.dot(drUnit))*(-8*rInv2*SQRT(chi));
+    Vec3 dchidr = (iota-drUnit*iota.dot(drUnit))*(-8*rInv2*sqrt(chi));
-    RealVec force = (dchidr*u + dudr*chi)*(eta*switchValue) - drUnit*(energy*switchDeriv);
+    Vec3 force = (dchidr*u + dudr*chi)*(eta*switchValue) - drUnit*(energy*switchDeriv);
    forces[4*particle1] += force[0];
    forces[4*particle1+1] += force[1];
    forces[4*particle1+2] += force[2];
@@ -413,13 +401,13 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
        ParticleInfo& p = particles[particle];
        if (p.isPointParticle)
            continue;
-        RealVec dudq = (kappa*G[particle]).cross(kappa*(temp*dUSLJdr));
+        Vec3 dudq = (kappa*G[particle]).cross(kappa*(temp*dUSLJdr));
-        RealVec dchidq = (iota*B[particle]).cross(iota)*(-4*rInv2);
+        Vec3 dchidq = (iota*B[particle]).cross(iota)*(-4*rInv2);
-        RealOpenMM (&g12)[3][3] = G12.v;
+        double (&g12)[3][3] = G12.v;
-        RealOpenMM (&a)[3][3] = A[particle].v;
+        double (&a)[3][3] = A[particle].v;
-        RealVec scale = RealVec(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz)*(-0.5*eta/detG12);
+        Vec3 scale = Vec3(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz)*(-0.5*eta/detG12);
        Matrix D;
-        RealOpenMM (&d)[3][3] = D.v;
+        double (&d)[3][3] = D.v;
        d[0][0] = scale[0]*(2*a[0][0]*(g12[1][1]*g12[2][2] - g12[1][2]*g12[2][1]) +
                              a[0][2]*(g12[1][2]*g12[0][1] + g12[1][0]*g12[2][1] - g12[1][1]*(g12[0][2] + g12[2][0])) +
                              a[0][1]*(g12[0][2]*g12[2][1] + g12[2][0]*g12[1][2] - g12[2][2]*(g12[0][1] + g12[1][0])));
@@ -447,10 +435,10 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
        d[2][2] = scale[2]*(  a[2][0]*(g12[0][1]*g12[1][2] + g12[2][1]*g12[1][0] - g12[1][1]*(g12[0][2] + g12[2][0])) +
                              a[2][1]*(g12[1][0]*g12[0][2] + g12[2][0]*g12[0][1] - g12[0][0]*(g12[1][2] + g12[2][1])) +
                            2*a[2][2]*(g12[1][1]*g12[0][0] - g12[1][0]*g12[0][1]));
-        RealVec detadq;
+        Vec3 detadq;
        for (int i = 0; i < 3; i++)
-            detadq += RealVec(a[i][0], a[i][1], a[i][2]).cross(RealVec(d[i][0], d[i][1], d[i][2]));
+            detadq += Vec3(a[i][0], a[i][1], a[i][2]).cross(Vec3(d[i][0], d[i][1], d[i][2]));
-        RealVec torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue;
+        Vec3 torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue;
        torques[particle] -= torque;
    }
    return switchValue*energy;

--- a/platforms/cpu/src/CpuKernels.cpp
+++ b/platforms/cpu/src/CpuKernels.cpp
--- a/platforms/cpu/src/CpuLangevinDynamics.cpp
+++ b/platforms/cpu/src/CpuLangevinDynamics.cpp
-/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
 * Authors: Peter Eastman
 * Contributors: 
 *
@@ -29,45 +29,15 @@
 using namespace OpenMM;
 using namespace std;
-class CpuLangevinDynamics::Update1Task : public ThreadPool::Task {
+CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, double deltaT, double friction, double temperature, ThreadPool& threads, CpuRandom& random) : 
-public:
+           ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
-    Update1Task(CpuLangevinDynamics& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadUpdate1(threadIndex);
-    }
-    CpuLangevinDynamics& owner;
-};
-class CpuLangevinDynamics::Update2Task : public ThreadPool::Task {
-public:
-    Update2Task(CpuLangevinDynamics& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadUpdate2(threadIndex);
-    }
-    CpuLangevinDynamics& owner;
-};
-class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
-public:
-    Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadUpdate3(threadIndex);
-    }
-    CpuLangevinDynamics& owner;
-};
-CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) : 
-           ReferenceStochasticDynamics(numberOfAtoms, deltaT, tau, temperature), threads(threads), random(random) {
 }
 CpuLangevinDynamics::~CpuLangevinDynamics() {
 }
-void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities,
+void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
-                                      vector<RealVec>& forces, vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) {
+                                      vector<Vec3>& forces, vector<double>& inverseMasses, vector<Vec3>& xPrime) {
    // Record the parameters for the threads.
    this->numberOfAtoms = numberOfAtoms;
@@ -79,13 +49,12 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo
    // Signal the threads to start running and wait for them to finish.
-    Update1Task task(*this);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate1(threadIndex); });
-    threads.execute(task);
    threads.waitForThreads();
 }
-void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities,
+void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
-                                      vector<RealVec>& forces, vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) {
+                                      vector<Vec3>& forces, vector<double>& inverseMasses, vector<Vec3>& xPrime) {
    // Record the parameters for the threads.
    this->numberOfAtoms = numberOfAtoms;
@@ -97,13 +66,12 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo
    // Signal the threads to start running and wait for them to finish.
-    Update2Task task(*this);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate2(threadIndex); });
-    threads.execute(task);
    threads.waitForThreads();
 }
-void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities,
+void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
-                                       vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) {
+                                       vector<double>& inverseMasses, vector<Vec3>& xPrime) {
    // Record the parameters for the threads.
    this->numberOfAtoms = numberOfAtoms;
@@ -114,44 +82,44 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
    // Signal the threads to start running and wait for them to finish.
-    Update3Task task(*this);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate3(threadIndex); });
-    threads.execute(task);
    threads.waitForThreads();
 }
 void CpuLangevinDynamics::threadUpdate1(int threadIndex) {
-    const RealOpenMM tau = getTau();
+    double dt = getDeltaT();
-    const RealOpenMM vscale = EXP(-getDeltaT()/tau);
+    double friction = getFriction();
-    const RealOpenMM fscale = (1-vscale)*tau;
+    const double vscale = exp(-dt*friction);
-    const RealOpenMM kT = BOLTZ*getTemperature();
+    const double fscale = (friction == 0 ? dt : (1-vscale)/friction);
-    const RealOpenMM noisescale = SQRT(2*kT/tau)*SQRT(0.5*(1-vscale*vscale)*tau);
+    const double kT = BOLTZ*getTemperature();
+    const double noisescale = sqrt(kT*(1-vscale*vscale));
    int start = threadIndex*numberOfAtoms/threads.getNumThreads();
    int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
    for (int i = start; i < end; i++) {
        if (inverseMasses[i] != 0.0) {
-            RealOpenMM sqrtInvMass = SQRT(inverseMasses[i]);
+            double sqrtInvMass = sqrt(inverseMasses[i]);
-            RealVec noise(random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex));
+            Vec3 noise(random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex));
            velocities[i]  = velocities[i]*vscale + forces[i]*(fscale*inverseMasses[i]) + noise*(noisescale*sqrtInvMass);
        }
   }
 }
 void CpuLangevinDynamics::threadUpdate2(int threadIndex) {
-    const RealOpenMM dt = getDeltaT();
+    const double dt = getDeltaT();
    int start = threadIndex*numberOfAtoms/threads.getNumThreads();
    int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
    for (int i = start; i < end; i++) {
        if (inverseMasses[i] != 0.0) {
-            RealOpenMM sqrtInvMass = SQRT(inverseMasses[i]);
+            double sqrtInvMass = sqrt(inverseMasses[i]);
            xPrime[i] = atomCoordinates[i]+velocities[i]*dt;
        }
   }
 }
 void CpuLangevinDynamics::threadUpdate3(int threadIndex) {
-   const RealOpenMM invStepSize = 1.0/getDeltaT();
+   const double invStepSize = 1.0/getDeltaT();
    int start = threadIndex*numberOfAtoms/threads.getNumThreads();
    int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();

--- a/platforms/cpu/src/CpuNeighborList.cpp
+++ b/platforms/cpu/src/CpuNeighborList.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2016 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2017 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -59,7 +59,7 @@ public:
 */
 class CpuNeighborList::Voxels {
 public:
-    Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) :
+    Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const Vec3* boxVectors, bool usePeriodic) :
            blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
        for (int i = 0; i < 3; i++)
            for (int j = 0; j < 3; j++)
@@ -409,21 +409,11 @@ private:
    vector<vector<vector<pair<float, int> > > > bins;
 };
-class CpuNeighborList::ThreadTask : public ThreadPool::Task {
-public:
-    ThreadTask(CpuNeighborList& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadComputeNeighborList(threads, threadIndex);
-    }
-    CpuNeighborList& owner;
-};
 CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
 }
 void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
-            const RealVec* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
+            const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
    int numBlocks = (numAtoms+blockSize-1)/blockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
    // Sort the atoms based on a Hilbert curve.
    atomBins.resize(numAtoms);
-    ThreadTask task(*this);
+    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
-    threads.execute(task);
    threads.waitForThreads();
    sort(atomBins.begin(), atomBins.end());

--- a/platforms/cpu/src/CpuNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuNonbondedForce.cpp
--- a/platforms/cpu/src/CpuNonbondedForceVec4.cpp
+++ b/platforms/cpu/src/CpuNonbondedForceVec4.cpp
--- a/platforms/cpu/src/CpuNonbondedForceVec8.cpp
+++ b/platforms/cpu/src/CpuNonbondedForceVec8.cpp
--- a/platforms/cpu/src/CpuPlatform.cpp
+++ b/platforms/cpu/src/CpuPlatform.cpp
@@ -127,6 +127,8 @@ void CpuPlatform::contextDestroyed(ContextImpl& context) const {
    PlatformData* data = contextData[&context];
    delete data;
    contextData.erase(&context);
+    ReferencePlatform::PlatformData* refPlatformData = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
+    delete refPlatformData;
 }
 CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) {

--- a/platforms/cpu/src/CpuSETTLE.cpp
+++ b/platforms/cpu/src/CpuSETTLE.cpp
--- a/platforms/cpu/staticTarget/CMakeLists.txt
+++ b/platforms/cpu/staticTarget/CMakeLists.txt
@@ -16,7 +16,6 @@ ENDFOREACH(file)
 ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
 TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB_STATIC})
-#-DPTW32_STATIC_LIB only works for the windows pthreads.
+SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY")
-SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
 INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
--- a/platforms/cpu/tests/TestCpuDispersionPME.cpp
+++ b/platforms/cpu/tests/TestCpuDispersionPME.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2017 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+#include "CpuTests.h"
+#include "TestDispersionPME.h"
+void runPlatformTests() {
+}
--- a/platforms/cpu/tests/TestCpuNeighborList.cpp
+++ b/platforms/cpu/tests/TestCpuNeighborList.cpp
--- a/platforms/cuda/include/CudaContext.h
+++ b/platforms/cuda/include/CudaContext.h
--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
--- a/platforms/cuda/include/CudaNonbondedUtilities.h
+++ b/platforms/cuda/include/CudaNonbondedUtilities.h
--- a/platforms/cuda/include/CudaParallelKernels.h
+++ b/platforms/cuda/include/CudaParallelKernels.h
--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp