Merge pull request #2133 from peastman/atomic

Replaced gmx_atomic with C++ atomic

Merge pull request #2133 from peastman/atomic
Replaced gmx_atomic with C++ atomic
fdb501e3 · peastman · GitHub · e72a4e8c · 72bfef12 · e72a4e8c
Unverified Commit fdb501e3 authored Jul 25, 2018 by peastman Committed by GitHub Jul 25, 2018
18 changed files
--- a/openmmapi/include/openmm/internal/gmx_atomic.h
+++ b/openmmapi/include/openmm/internal/gmx_atomic.h
--- a/platforms/cpu/include/CpuCustomGBForce.h
+++ b/platforms/cpu/include/CpuCustomGBForce.h
@@ -31,6 +31,7 @@
 #include "openmm/internal/CompiledExpressionSet.h"
 #include "openmm/internal/ThreadPool.h"
 #include "openmm/internal/vectorize.h"
+#include <atomic>
 #include <map>
 #include <set>
 #include <vector>
@@ -63,7 +64,7 @@ private:
    const std::map<std::string, double>* globalParameters;
    std::vector<AlignedArray<float> >* threadForce;
    bool includeForce, includeEnergy;
-    void* atomicCounter;
+    std::atomic<int> atomicCounter;
    /**
     * This routine contains the code executed by each thread.

--- a/platforms/cpu/include/CpuCustomManyParticleForce.h
+++ b/platforms/cpu/include/CpuCustomManyParticleForce.h
@@ -34,6 +34,7 @@
 #include "openmm/internal/vectorize.h"
 #include "lepton/CompiledExpression.h"
 #include "lepton/ParsedExpression.h"
+#include <atomic>
 #include <map>
 #include <set>
 #include <utility>
@@ -69,7 +70,7 @@ private:
    const std::map<std::string, double>* globalParameters;
    std::vector<AlignedArray<float> >* threadForce;
    bool includeForces, includeEnergy;
-    void* atomicCounter;
+    std::atomic<int> atomicCounter;
    /**
     * This routine contains the code executed by each thread.

--- a/platforms/cpu/include/CpuCustomNonbondedForce.h
+++ b/platforms/cpu/include/CpuCustomNonbondedForce.h
@@ -30,6 +30,7 @@
 #include "openmm/internal/CompiledExpressionSet.h"
 #include "openmm/internal/ThreadPool.h"
 #include "openmm/internal/vectorize.h"
+#include <atomic>
 #include <map>
 #include <set>
 #include <utility>
@@ -147,7 +148,7 @@ private:
    const std::map<std::string, double>* globalParameters;
    std::vector<AlignedArray<float> >* threadForce;
    bool includeForce, includeEnergy;
-    void* atomicCounter;
+    std::atomic<int> atomicCounter;
    /**
     * This routine contains the code executed by each thread.

--- a/platforms/cpu/include/CpuGBSAOBCForce.h
+++ b/platforms/cpu/include/CpuGBSAOBCForce.h
-/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2018 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -28,6 +28,7 @@
 #include "AlignedArray.h"
 #include "openmm/internal/ThreadPool.h"
 #include "openmm/internal/vectorize.h"
+#include <atomic>
 #include <set>
 #include <utility>
 #include <vector>
@@ -112,7 +113,7 @@ private:
    float const* posq;
    std::vector<AlignedArray<float> >* threadForce;
    bool includeEnergy;
-    void* atomicCounter;
+    std::atomic<int> atomicCounter;
    static const int NUM_TABLE_POINTS;
    static const float TABLE_MIN;

--- a/platforms/cpu/include/CpuGayBerneForce.h
+++ b/platforms/cpu/include/CpuGayBerneForce.h
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2016-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2016-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -91,7 +91,7 @@ private:
    Vec3 const* positions;
    std::vector<AlignedArray<float> >* threadForce;
    Vec3* boxVectors;
-    void* atomicCounter;
+    std::atomic<int> atomicCounter;
    void computeEllipsoidFrames(const std::vector<Vec3>& positions);

--- a/platforms/cpu/include/CpuNeighborList.h
+++ b/platforms/cpu/include/CpuNeighborList.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -35,8 +35,8 @@
 #include "AlignedArray.h"
 #include "openmm/Vec3.h"
 #include "windowsExportCpu.h"
-#include "openmm/internal/gmx_atomic.h"
 #include "openmm/internal/ThreadPool.h"
+#include <atomic>
 #include <set>
 #include <utility>
 #include <vector>
@@ -75,7 +75,7 @@ private:
    int numAtoms;
    bool usePeriodic;
    float maxDistance;
-    gmx_atomic_t atomicCounter;
+    std::atomic<int> atomicCounter;
 };
 } // namespace OpenMM

--- a/platforms/cpu/include/CpuNonbondedForce.h
+++ b/platforms/cpu/include/CpuNonbondedForce.h
-/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2018 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -30,6 +30,7 @@
 #include "ReferencePairIxn.h"
 #include "openmm/internal/ThreadPool.h"
 #include "openmm/internal/vectorize.h"
+#include <atomic>
 #include <set>
 #include <utility>
 #include <vector>
@@ -200,7 +201,7 @@ protected:
        bool includeEnergy;
        float inverseRcut6;
        float inverseRcut6Expterm;
-        void* atomicCounter;
+        std::atomic<int> atomicCounter;
        static const float TWO_OVER_SQRT_PI;
        static const int NUM_TABLE_POINTS;

--- a/platforms/cpu/src/CpuCustomGBForce.cpp
+++ b/platforms/cpu/src/CpuCustomGBForce.cpp
@@ -28,7 +28,6 @@
 #include "SimTKOpenMMUtilities.h"
 #include "ReferenceForce.h"
 #include "CpuCustomGBForce.h"
-#include "openmm/internal/gmx_atomic.h"
 using namespace OpenMM;
 using namespace std;
@@ -191,13 +190,11 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, vector<vecto
    this->includeForce = includeForce;
    this->includeEnergy = includeEnergy;
    threadEnergy.resize(threads.getNumThreads());
-    gmx_atomic_t counter;
-    this->atomicCounter = &counter;
    // Calculate the first computed value.
    auto task = [&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); };
-    gmx_atomic_set(&counter, 0);
+    atomicCounter = 0;
    threads.execute(task);
    threads.waitForThreads();
@@ -217,7 +214,7 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, vector<vecto
    // Calculate the energy terms.
    for (int i = 0; i < (int) threadData[0]->energyExpressions.size(); i++) {
-        gmx_atomic_set(&counter, 0);
+        atomicCounter = 0;
        threads.execute(task);
        threads.waitForThreads();
    }
@@ -229,7 +226,7 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, vector<vecto
    // Apply the chain rule to evaluate forces.
-    gmx_atomic_set(&counter, 0);
+    atomicCounter = 0;
    threads.resumeThreads();
    threads.waitForThreads();
@@ -361,7 +358,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
        // Loop over all pairs in the neighbor list.
        while (true) {
-            int blockIndex = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int blockIndex = atomicCounter++;
            if (blockIndex >= neighborList->getNumBlocks())
                break;
            const int blockSize = neighborList->getBlockSize();
@@ -386,7 +383,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
        // Perform an O(N^2) loop over all atom pairs.
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= numAtoms)
                break;
            for (int j = i+1; j < numAtoms; j++) {
@@ -456,7 +453,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
        // Loop over all pairs in the neighbor list.
        while (true) {
-            int blockIndex = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int blockIndex = atomicCounter++;
            if (blockIndex >= neighborList->getNumBlocks())
                break;
            const int blockSize = neighborList->getBlockSize();
@@ -480,7 +477,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
        // Perform an O(N^2) loop over all atom pairs.
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= numAtoms)
                break;
            for (int j = i+1; j < numAtoms; j++) {
@@ -543,7 +540,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
        // Loop over all pairs in the neighbor list.
        while (true) {
-            int blockIndex = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int blockIndex = atomicCounter++;
            if (blockIndex >= neighborList->getNumBlocks())
                break;
            const int blockSize = neighborList->getBlockSize();
@@ -567,7 +564,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
        // Perform an O(N^2) loop over all atom pairs.
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= numAtoms)
                break;
            for (int j = i+1; j < numAtoms; j++) {

--- a/platforms/cpu/src/CpuCustomManyParticleForce.cpp
+++ b/platforms/cpu/src/CpuCustomManyParticleForce.cpp
@@ -32,7 +32,6 @@
 #include "ReferenceTabulatedFunction.h"
 #include "openmm/internal/CustomManyParticleForceImpl.h"
 #include "lepton/CustomFunction.h"
-#include "openmm/internal/gmx_atomic.h"
 using namespace OpenMM;
 using namespace std;
@@ -99,9 +98,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, vector<
    this->threadForce = &threadForce;
    this->includeForces = includeForces;
    this->includeEnergy = includeEnergy;
-    gmx_atomic_t counter;
+    atomicCounter = 0;
-    gmx_atomic_set(&counter, 0);
-    this->atomicCounter = &counter;
    if (useCutoff) {
        // Construct a neighbor list.  We use CpuNeighborList to do this, but then copy the result
        // into a new data structure.  This is needed because in UniqueCentralParticle mode, the
@@ -156,7 +153,7 @@ void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int thr
        // Loop over interactions from the neighbor list.
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= numParticles)
                break;
            particleIndices[0] = i;
@@ -170,7 +167,7 @@ void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int thr
        for (int i = 0; i < numParticles; i++)
            particles[i] = i;
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= numParticles)
                break;
            particleIndices[0] = i;

--- a/platforms/cpu/src/CpuCustomNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuCustomNonbondedForce.cpp
-/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2018 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -28,7 +28,6 @@
 #include "SimTKOpenMMUtilities.h"
 #include "ReferenceForce.h"
 #include "CpuCustomNonbondedForce.h"
-#include "openmm/internal/gmx_atomic.h"
 using namespace OpenMM;
 using namespace std;
@@ -134,9 +133,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
    this->includeForce = includeForce;
    this->includeEnergy = includeEnergy;
    threadEnergy.resize(threads.getNumThreads());
-    gmx_atomic_t counter;
+    atomicCounter = 0;
-    gmx_atomic_set(&counter, 0);
-    this->atomicCounter = &counter;
    // Signal the threads to start running and wait for them to finish.
@@ -177,7 +174,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
        // The user has specified interaction groups, so compute only the requested interactions.
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= groupInteractions.size())
                break;
            int atom1 = groupInteractions[i].first;
@@ -193,7 +190,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
        // We are using a cutoff, so get the interactions from the neighbor list.
        while (true) {
-            int blockIndex = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int blockIndex = atomicCounter++;
            if (blockIndex >= neighborList->getNumBlocks())
                break;
            const int blockSize = neighborList->getBlockSize();
@@ -219,7 +216,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
        // Every particle interacts with every other one.
        while (true) {
-            int ii = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int ii = atomicCounter++;
            if (ii >= numberOfAtoms)
                break;
            for (int jj = ii+1; jj < numberOfAtoms; jj++) {

--- a/platforms/cpu/src/CpuGBSAOBCForce.cpp
+++ b/platforms/cpu/src/CpuGBSAOBCForce.cpp
-/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2018 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -24,7 +24,6 @@
 #include "CpuGBSAOBCForce.h"
 #include "SimTKOpenMMRealType.h"
 #include "openmm/internal/vectorize.h"
-#include "openmm/internal/gmx_atomic.h"
 #include <algorithm>
 #include <cmath>
 #include <cstdlib>
@@ -95,21 +94,19 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align
    threadBornForces.resize(numThreads);
    for (int i = 0; i < numThreads; i++)
        threadBornForces[i].resize(particleParams.size()+3);
-    gmx_atomic_t counter;
-    this->atomicCounter = &counter;
    // Signal the threads to start running and wait for them to finish.
-    gmx_atomic_set(&counter, 0);
+    atomicCounter = 0;
    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
    threads.waitForThreads(); // Compute Born radii
-    gmx_atomic_set(&counter, 0);
+    atomicCounter = 0;
    threads.resumeThreads();
    threads.waitForThreads(); // Compute surface area term
-    gmx_atomic_set(&counter, 0);
+    atomicCounter = 0;
    threads.resumeThreads();
    threads.waitForThreads(); // First loop
-    gmx_atomic_set(&counter, 0);
+    atomicCounter = 0;
    threads.resumeThreads();
    threads.waitForThreads(); // Second loop
@@ -138,7 +135,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
    // Calculate Born radii
    while (true) {
-        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
+        int blockStart = atomicCounter.fetch_add(4);
        if (blockStart >= numParticles)
            break;
        int numInBlock = min(4, numParticles-blockStart);
@@ -215,7 +212,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
    for (int i = 0; i < numParticles; i++)
        bornForces[i] = 0.0f;
    while (true) {
-        int atomI = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+        int atomI = atomicCounter++;
        if (atomI >= numParticles)
            break;
        if (bornRadii[atomI] > 0) {
@@ -240,7 +237,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
    else
        preFactor = 0.0f;
    while (true) {
-        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
+        int blockStart = atomicCounter.fetch_add(4);
        if (blockStart >= numParticles)
            break;
        int numInBlock = min(4, numParticles-blockStart);
@@ -318,7 +315,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
    // Second loop of Born energy computation.
    while (true) {
-        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
+        int blockStart = atomicCounter.fetch_add(4);
        if (blockStart >= numParticles)
            break;
        fvec4 bornForce(0.0f);

--- a/platforms/cpu/src/CpuGayBerneForce.cpp
+++ b/platforms/cpu/src/CpuGayBerneForce.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2016-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2016-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -37,7 +37,6 @@
 #include "ReferenceForce.h"
 #include "openmm/OpenMMException.h"
 #include "openmm/GayBerneForce.h"
-#include "openmm/internal/gmx_atomic.h"
 #include <algorithm>
 #include <cmath>
@@ -120,9 +119,7 @@ double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vect
    this->boxVectors = boxVectors;
    threadEnergy.resize(numThreads);
    threadTorque.resize(numThreads);
-    gmx_atomic_t counter;
+    atomicCounter = 0;
-    gmx_atomic_set(&counter, 0);
-    this->atomicCounter = &counter;
    // Signal the threads to compute the pairwise interactions.
@@ -131,7 +128,7 @@ double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vect
    // Signal the threads to compute exceptions.
-    gmx_atomic_set(&counter, 0);
+    atomicCounter = 0;
    threads.resumeThreads();
    threads.waitForThreads();
@@ -162,7 +159,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
    if (neighborList == NULL) {
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= numParticles)
                break;
            if (particles[i].sqrtEpsilon == 0.0f)
@@ -180,7 +177,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
    }
    else {
        while (true) {
-            int blockIndex = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int blockIndex = atomicCounter++;
            if (blockIndex >= neighborList->getNumBlocks())
                break;
            const int blockSize = neighborList->getBlockSize();
@@ -211,7 +208,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
    int numExceptions = exceptions.size();
    const int groupSize = max(1, numExceptions/(10*numThreads));
    while (true) {
-        int start = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), groupSize);
+        int start = atomicCounter.fetch_add(groupSize);
        if (start >= numExceptions)
            break;
        int end = min(start+groupSize, numExceptions);

--- a/platforms/cpu/src/CpuNeighborList.cpp
+++ b/platforms/cpu/src/CpuNeighborList.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -476,7 +476,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
    // Signal the threads to start running and wait for them to finish.
-    gmx_atomic_set(&atomicCounter, 0);
+    atomicCounter = 0;
    threads.resumeThreads();
    threads.waitForThreads();
@@ -538,7 +538,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
    vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
    vector<VoxelIndex> atomVoxelIndex;
    while (true) {
-        int i = gmx_atomic_fetch_add(&atomicCounter, 1);
+        int i = atomicCounter++;
        if (i >= numBlocks)
            break;

--- a/platforms/cpu/src/CpuNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuNonbondedForce.cpp
-/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2018 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -28,7 +28,6 @@
 #include "CpuNonbondedForce.h"
 #include "ReferenceForce.h"
 #include "ReferencePME.h"
-#include "openmm/internal/gmx_atomic.h"
 #include <algorithm>
 #include <iostream>
@@ -389,9 +388,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
    this->threadForce = &threadForce;
    includeEnergy = (totalEnergy != NULL);
    threadEnergy.resize(threads.getNumThreads());
-    gmx_atomic_t counter;
+    atomicCounter = 0;
-    gmx_atomic_set(&counter, 0);
-    this->atomicCounter = &counter;
    // Signal the threads to start running and wait for them to finish.
@@ -401,7 +398,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
    // Signal the threads to subtract the exclusions.
    if (ewald || pme) {
-        gmx_atomic_set(&counter, 0);
+        atomicCounter = 0;
        threads.resumeThreads();
        threads.waitForThreads();
    }
@@ -429,7 +426,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
    if (ewald || pme || ljpme) {
        // Compute the interactions from the neighbor list.
        while (true) {
-            int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int nextBlock = atomicCounter++;
            if (nextBlock >= neighborList->getNumBlocks())
                break;
            calculateBlockEwaldIxn(nextBlock, forces, energyPtr, boxSize, invBoxSize);
@@ -440,7 +437,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
        threads.syncThreads();
        const int groupSize = max(1, numberOfAtoms/(10*numThreads));
        while (true) {
-            int start = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), groupSize);
+            int start = atomicCounter.fetch_add(groupSize);
            if (start >= numberOfAtoms)
                break;
            int end = min(start+groupSize, numberOfAtoms);
@@ -490,7 +487,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
        // Compute the interactions from the neighbor list.
        while (true) {
-            int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int nextBlock = atomicCounter++;
            if (nextBlock >= neighborList->getNumBlocks())
                break;
            calculateBlockIxn(nextBlock, forces, energyPtr, boxSize, invBoxSize);
@@ -500,7 +497,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
        // Loop over all atom pairs
        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            int i = atomicCounter++;
            if (i >= numberOfAtoms)
                break;
            for (int j = i+1; j < numberOfAtoms; j++)

--- a/platforms/cpu/src/CpuSETTLE.cpp
+++ b/platforms/cpu/src/CpuSETTLE.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -30,7 +30,7 @@
 * -------------------------------------------------------------------------- */
 #include "CpuSETTLE.h"
-#include "openmm/internal/gmx_atomic.h"
+#include <atomic>
 using namespace OpenMM;
 using namespace std;
@@ -61,11 +61,11 @@ CpuSETTLE::~CpuSETTLE() {
 }
 void CpuSETTLE::apply(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& atomCoordinatesP, vector<double>& inverseMasses, double tolerance) {
-    gmx_atomic_t atomicCounter;
+    atomic<int> atomicCounter;
-    gmx_atomic_set(&atomicCounter, 0);
+    atomicCounter = 0;
    threads.execute([&] (ThreadPool& threads, int threadIndex) {
        while (true) {
-            int index = gmx_atomic_fetch_add(&atomicCounter, 1);
+            int index = atomicCounter++;
            if (index >= threadSettle.size())
                break;
            threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
@@ -75,11 +75,11 @@ void CpuSETTLE::apply(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3
 }
 void CpuSETTLE::applyToVelocities(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& velocities, vector<double>& inverseMasses, double tolerance) {
-    gmx_atomic_t atomicCounter;
+    atomic<int> atomicCounter;
-    gmx_atomic_set(&atomicCounter, 0);
+    atomicCounter = 0;
    threads.execute([&] (ThreadPool& threads, int threadIndex) {
        while (true) {
-            int index = gmx_atomic_fetch_add(&atomicCounter, 1);
+            int index = atomicCounter++;
            if (index >= threadSettle.size())
                break;
            threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);

--- a/plugins/cpupme/src/CpuPmeKernels.cpp
+++ b/plugins/cpupme/src/CpuPmeKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -52,7 +52,7 @@ bool CpuCalcDispersionPmeReciprocalForceKernel::hasInitializedThreads = false;
 int CpuCalcDispersionPmeReciprocalForceKernel::numThreads = 0;
 static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors,
-        gmx_atomic_t& atomicCounter, const float epsilonFactor, int threadIndex, int numThreads, bool deterministic) {
+        atomic<int>& atomicCounter, const float epsilonFactor, int threadIndex, int numThreads, bool deterministic) {
    float temp[4];
    fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
    fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
@@ -69,7 +69,7 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
    int i = threadIndex;
    while (true) {
        if (!deterministic)
-            i = gmx_atomic_fetch_add(&atomicCounter, 1);
+            i = atomicCounter++;
        if (i >= numParticles)
            break;
@@ -310,7 +310,7 @@ static void reciprocalConvolution(int start, int end, fftwf_complex* grid, vecto
    }
 }
-static void interpolateForces(float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, gmx_atomic_t& atomicCounter, const float epsilonFactor) {
+static void interpolateForces(float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, atomic<int>& atomicCounter, const float epsilonFactor) {
    fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
    fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
    fvec4 recipBoxVec0((float) recipBoxVectors[0][0], (float) recipBoxVectors[0][1], (float) recipBoxVectors[0][2], 0);
@@ -321,7 +321,7 @@ static void interpolateForces(float* posq, float* force, float* grid, int gridx,
    fvec4 one(1);
    fvec4 scale(1.0f/(PME_ORDER-1));
    while (true) {
-        int i = gmx_atomic_fetch_add(&atomicCounter, 1);
+        int i = atomicCounter++;
        if (i >= numParticles)
            break;
@@ -545,7 +545,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
        if (isDeleted)
            break;
        posq = io->getPosq();
-        gmx_atomic_set(&atomicCounter, 0);
+        atomicCounter = 0;
        threads.execute([&] (ThreadPool& threads, int threadIndex) { runWorkerThread(threads, threadIndex); }); // Signal threads to perform charge spreading.
        threads.waitForThreads();
        threads.resumeThreads(); // Signal threads to sum the charge grids.
@@ -564,7 +564,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
        threads.resumeThreads(); // Signal threads to perform reciprocal convolution.
        threads.waitForThreads();
        fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid);
-        gmx_atomic_set(&atomicCounter, 0);
+        atomicCounter = 0;
        threads.resumeThreads(); // Signal threads to interpolate forces.
        threads.waitForThreads();
        isFinished = true;
@@ -837,7 +837,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() {
            break;
        posq = io->getPosq();
        ComputeTask task(*this);
-        gmx_atomic_set(&atomicCounter, 0);
+        atomicCounter = 0;
        threads.execute(task); // Signal threads to perform charge spreading.
        threads.waitForThreads();
        threads.resumeThreads(); // Signal threads to sum the charge grids.
@@ -856,7 +856,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runMainThread() {
        threads.resumeThreads(); // Signal threads to perform reciprocal convolution.
        threads.waitForThreads();
        fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid);
-        gmx_atomic_set(&atomicCounter, 0);
+        atomicCounter = 0;
        threads.resumeThreads(); // Signal threads to interpolate forces.
        threads.waitForThreads();
        isFinished = true;

--- a/plugins/cpupme/src/CpuPmeKernels.h
+++ b/plugins/cpupme/src/CpuPmeKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -36,8 +36,8 @@
 #include "internal/windowsExportPme.h"
 #include "openmm/kernels.h"
 #include "openmm/Vec3.h"
-#include "openmm/internal/gmx_atomic.h"
 #include "openmm/internal/ThreadPool.h"
+#include <atomic>
 #include <fftw3.h>
 #include <pthread.h>
 #include <vector>
@@ -132,7 +132,7 @@ private:
    float* posq;
    Vec3 periodicBoxVectors[3], recipBoxVectors[3];
    bool includeEnergy;
-    gmx_atomic_t atomicCounter;
+    std::atomic<int> atomicCounter;
 };
@@ -226,7 +226,7 @@ private:
    float* posq;
    Vec3 periodicBoxVectors[3], recipBoxVectors[3];
    bool includeEnergy;
-    gmx_atomic_t atomicCounter;
+    std::atomic<int> atomicCounter;
 };
 } // namespace OpenMM