Fixed errors on Windows

c66766a8 · peastman · 6f7dee30 · c66766a8 · c66766a8 · c66766a8
Commit c66766a8 authored Sep 03, 2014 by peastman
3 changed files
--- a/openmmapi/include/openmm/internal/vectorize_sse.h
+++ b/openmmapi/include/openmm/internal/vectorize_sse.h
-#ifndef OPENMM_VECTORIZE_SSE_H_
-#define OPENMM_VECTORIZE_SSE_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include <smmintrin.h>
-#include "hardware.h"
-
-// This file defines classes and functions to simplify vectorizing code with SSE.
-
-/**
- * Determine whether ivec4 and fvec4 are supported on this processor.
- */
-static bool isVec4Supported() {
-    int cpuInfo[4];
-    cpuid(cpuInfo, 0);
-    if (cpuInfo[0] >= 1) {
-        cpuid(cpuInfo, 1);
-        return ((cpuInfo[2] & ((int) 1 << 19)) != 0);
-    }
-    return false;
-}
-
-class ivec4;
-
-/**
- * A four element vector of floats.
- */
-class fvec4 {
-public:
-    __m128 val;
-    
-    fvec4() {}
-    fvec4(float v) : val(_mm_set1_ps(v)) {}
-    fvec4(float v1, float v2, float v3, float v4) : val(_mm_set_ps(v4, v3, v2, v1)) {}
-    fvec4(__m128 v) : val(v) {}
-    fvec4(const float* v) : val(_mm_loadu_ps(v)) {}
-    operator __m128() const {
-        return val;
-    }
-    float operator[](int i) const {
-        float result[4];
-        store(result);
-        return result[i];
-    }
-    void store(float* v) const {
-        _mm_storeu_ps(v, val);
-    }
-    fvec4 operator+(const fvec4& other) const {
-        return _mm_add_ps(val, other);
-    }
-    fvec4 operator-(const fvec4& other) const {
-        return _mm_sub_ps(val, other);
-    }
-    fvec4 operator*(const fvec4& other) const {
-        return _mm_mul_ps(val, other);
-    }
-    fvec4 operator/(const fvec4& other) const {
-        return _mm_div_ps(val, other);
-    }
-    void operator+=(const fvec4& other) {
-        val = _mm_add_ps(val, other);
-    }
-    void operator-=(const fvec4& other) {
-        val = _mm_sub_ps(val, other);
-    }
-    void operator*=(const fvec4& other) {
-        val = _mm_mul_ps(val, other);
-    }
-    void operator/=(const fvec4& other) {
-        val = _mm_div_ps(val, other);
-    }
-    fvec4 operator-() const {
-        return _mm_sub_ps(_mm_set1_ps(0.0f), val);
-    }
-    fvec4 operator&(const fvec4& other) const {
-        return _mm_and_ps(val, other);
-    }
-    fvec4 operator|(const fvec4& other) const {
-        return _mm_or_ps(val, other);
-    }
-    fvec4 operator==(const fvec4& other) const {
-        return _mm_cmpeq_ps(val, other);
-    }
-    fvec4 operator!=(const fvec4& other) const {
-        return _mm_cmpneq_ps(val, other);
-    }
-    fvec4 operator>(const fvec4& other) const {
-        return _mm_cmpgt_ps(val, other);
-    }
-    fvec4 operator<(const fvec4& other) const {
-        return _mm_cmplt_ps(val, other);
-    }
-    fvec4 operator>=(const fvec4& other) const {
-        return _mm_cmpge_ps(val, other);
-    }
-    fvec4 operator<=(const fvec4& other) const {
-        return _mm_cmple_ps(val, other);
-    }
-    operator ivec4() const;
-};
-
-/**
- * A four element vector of ints.
- */
-class ivec4 {
-public:
-    __m128i val;
-    
-    ivec4() {}
-    ivec4(int v) : val(_mm_set1_epi32(v)) {}
-    ivec4(int v1, int v2, int v3, int v4) : val(_mm_set_epi32(v4, v3, v2, v1)) {}
-    ivec4(__m128i v) : val(v) {}
-    ivec4(const int* v) : val(_mm_loadu_si128((const __m128i*) v)) {}
-    operator __m128i() const {
-        return val;
-    }
-    int operator[](int i) const {
-        int result[4];
-        store(result);
-        return result[i];
-    }
-    void store(int* v) const {
-        _mm_storeu_si128((__m128i*) v, val);
-    }
-    ivec4 operator+(const ivec4& other) const {
-        return _mm_add_epi32(val, other);
-    }
-    ivec4 operator-(const ivec4& other) const {
-        return _mm_sub_epi32(val, other);
-    }
-    ivec4 operator*(const ivec4& other) const {
-        return _mm_mullo_epi32(val, other);
-    }
-    void operator+=(const ivec4& other) {
-        val = _mm_add_epi32(val, other);
-    }
-    void operator-=(const ivec4& other) {
-        val = _mm_sub_epi32(val, other);
-    }
-    void operator*=(const ivec4& other) {
-        val = _mm_mullo_epi32(val, other);
-    }
-    ivec4 operator-() const {
-        return _mm_sub_epi32(_mm_set1_epi32(0), val);
-    }
-    ivec4 operator&(const ivec4& other) const {
-        return _mm_and_si128(val, other);
-    }
-    ivec4 operator|(const ivec4& other) const {
-        return _mm_or_si128(val, other);
-    }
-    ivec4 operator==(const ivec4& other) const {
-        return _mm_cmpeq_epi32(val, other);
-    }
-    ivec4 operator!=(const ivec4& other) const {
-        return _mm_xor_si128(*this==other, _mm_set1_epi32(0xFFFFFFFF));
-    }
-    ivec4 operator>(const ivec4& other) const {
-        return _mm_cmpgt_epi32(val, other);
-    }
-    ivec4 operator<(const ivec4& other) const {
-        return _mm_cmplt_epi32(val, other);
-    }
-    ivec4 operator>=(const ivec4& other) const {
-        return _mm_xor_si128(_mm_cmplt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
-    }
-    ivec4 operator<=(const ivec4& other) const {
-        return _mm_xor_si128(_mm_cmpgt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
-    }
-    operator fvec4() const;
-};
-
-// Conversion operators.
-
-inline fvec4::operator ivec4() const {
-    return _mm_cvttps_epi32(val);
-}
-
-inline ivec4::operator fvec4() const {
-    return _mm_cvtepi32_ps(val);
-}
-
-// Functions that operate on fvec4s.
-
-static inline fvec4 floor(const fvec4& v) {
-    return fvec4(_mm_floor_ps(v.val));
-}
-
-static inline fvec4 ceil(const fvec4& v) {
-    return fvec4(_mm_ceil_ps(v.val));
-}
-
-static inline fvec4 round(const fvec4& v) {
-    return fvec4(_mm_round_ps(v.val, _MM_FROUND_TO_NEAREST_INT));
-}
-
-static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
-    return fvec4(_mm_min_ps(v1.val, v2.val));
-}
-
-static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
-    return fvec4(_mm_max_ps(v1.val, v2.val));
-}
-
-static inline fvec4 abs(const fvec4& v) {
-    static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
-    return fvec4(_mm_and_ps(v.val, mask));
-}
-
-static inline fvec4 sqrt(const fvec4& v) {
-    return fvec4(_mm_sqrt_ps(v.val));
-}
-
-static inline float dot3(const fvec4& v1, const fvec4& v2) {
-    return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
-}
-
-static inline float dot4(const fvec4& v1, const fvec4& v2) {
-    return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
-}
-
-static inline fvec4 cross(const fvec4& v1, const fvec4& v2) {
-    fvec4 temp = _mm_mul_ps(v1, _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1))) -
-                 _mm_mul_ps(v2, _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1)));
-    return _mm_shuffle_ps(temp, temp, _MM_SHUFFLE(3, 0, 2, 1));
-}
-
-static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
-    _MM_TRANSPOSE4_PS(v1, v2, v3, v4);
-}
-
-// Functions that operate on ivec4s.
-
-static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
-    return ivec4(_mm_min_epi32(v1.val, v2.val));
-}
-
-static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
-    return ivec4(_mm_max_epi32(v1.val, v2.val));
-}
-
-static inline ivec4 abs(const ivec4& v) {
-    return ivec4(_mm_abs_epi32(v.val));
-}
-
-static inline bool any(const ivec4& v) {
-    return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
-}
-
-// Mathematical operators involving a scalar and a vector.
-
-static inline fvec4 operator+(float v1, const fvec4& v2) {
-    return fvec4(v1)+v2;
-}
-
-static inline fvec4 operator-(float v1, const fvec4& v2) {
-    return fvec4(v1)-v2;
-}
-
-static inline fvec4 operator*(float v1, const fvec4& v2) {
-    return fvec4(v1)*v2;
-}
-
-static inline fvec4 operator/(float v1, const fvec4& v2) {
-    return fvec4(v1)/v2;
-}
-
-// Operations for blending fvec4s based on an ivec4.
-
-static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const ivec4& mask) {
-    return fvec4(_mm_blendv_ps(v1.val, v2.val, _mm_castsi128_ps(mask.val)));
-}
-
-#endif /*OPENMM_VECTORIZE_SSE_H_*/
-
+#ifndef OPENMM_VECTORIZE_SSE_H_
+#define OPENMM_VECTORIZE_SSE_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include <smmintrin.h>
+#include "hardware.h"
+
+// This file defines classes and functions to simplify vectorizing code with SSE.
+
+/**
+ * Determine whether ivec4 and fvec4 are supported on this processor.
+ */
+static bool isVec4Supported() {
+    int cpuInfo[4];
+    cpuid(cpuInfo, 0);
+    if (cpuInfo[0] >= 1) {
+        cpuid(cpuInfo, 1);
+        return ((cpuInfo[2] & ((int) 1 << 19)) != 0);
+    }
+    return false;
+}
+
+class ivec4;
+
+/**
+ * A four element vector of floats.
+ */
+class fvec4 {
+public:
+    __m128 val;
+    
+    fvec4() {}
+    fvec4(float v) : val(_mm_set1_ps(v)) {}
+    fvec4(float v1, float v2, float v3, float v4) : val(_mm_set_ps(v4, v3, v2, v1)) {}
+    fvec4(__m128 v) : val(v) {}
+    fvec4(const float* v) : val(_mm_loadu_ps(v)) {}
+    operator __m128() const {
+        return val;
+    }
+    float operator[](int i) const {
+        float result[4];
+        store(result);
+        return result[i];
+    }
+    void store(float* v) const {
+        _mm_storeu_ps(v, val);
+    }
+    fvec4 operator+(const fvec4& other) const {
+        return _mm_add_ps(val, other);
+    }
+    fvec4 operator-(const fvec4& other) const {
+        return _mm_sub_ps(val, other);
+    }
+    fvec4 operator*(const fvec4& other) const {
+        return _mm_mul_ps(val, other);
+    }
+    fvec4 operator/(const fvec4& other) const {
+        return _mm_div_ps(val, other);
+    }
+    void operator+=(const fvec4& other) {
+        val = _mm_add_ps(val, other);
+    }
+    void operator-=(const fvec4& other) {
+        val = _mm_sub_ps(val, other);
+    }
+    void operator*=(const fvec4& other) {
+        val = _mm_mul_ps(val, other);
+    }
+    void operator/=(const fvec4& other) {
+        val = _mm_div_ps(val, other);
+    }
+    fvec4 operator-() const {
+        return _mm_sub_ps(_mm_set1_ps(0.0f), val);
+    }
+    fvec4 operator&(const fvec4& other) const {
+        return _mm_and_ps(val, other);
+    }
+    fvec4 operator|(const fvec4& other) const {
+        return _mm_or_ps(val, other);
+    }
+    fvec4 operator==(const fvec4& other) const {
+        return _mm_cmpeq_ps(val, other);
+    }
+    fvec4 operator!=(const fvec4& other) const {
+        return _mm_cmpneq_ps(val, other);
+    }
+    fvec4 operator>(const fvec4& other) const {
+        return _mm_cmpgt_ps(val, other);
+    }
+    fvec4 operator<(const fvec4& other) const {
+        return _mm_cmplt_ps(val, other);
+    }
+    fvec4 operator>=(const fvec4& other) const {
+        return _mm_cmpge_ps(val, other);
+    }
+    fvec4 operator<=(const fvec4& other) const {
+        return _mm_cmple_ps(val, other);
+    }
+    operator ivec4() const;
+};
+
+/**
+ * A four element vector of ints.
+ */
+class ivec4 {
+public:
+    __m128i val;
+    
+    ivec4() {}
+    ivec4(int v) : val(_mm_set1_epi32(v)) {}
+    ivec4(int v1, int v2, int v3, int v4) : val(_mm_set_epi32(v4, v3, v2, v1)) {}
+    ivec4(__m128i v) : val(v) {}
+    ivec4(const int* v) : val(_mm_loadu_si128((const __m128i*) v)) {}
+    operator __m128i() const {
+        return val;
+    }
+    int operator[](int i) const {
+        int result[4];
+        store(result);
+        return result[i];
+    }
+    void store(int* v) const {
+        _mm_storeu_si128((__m128i*) v, val);
+    }
+    ivec4 operator+(const ivec4& other) const {
+        return _mm_add_epi32(val, other);
+    }
+    ivec4 operator-(const ivec4& other) const {
+        return _mm_sub_epi32(val, other);
+    }
+    ivec4 operator*(const ivec4& other) const {
+        return _mm_mullo_epi32(val, other);
+    }
+    void operator+=(const ivec4& other) {
+        val = _mm_add_epi32(val, other);
+    }
+    void operator-=(const ivec4& other) {
+        val = _mm_sub_epi32(val, other);
+    }
+    void operator*=(const ivec4& other) {
+        val = _mm_mullo_epi32(val, other);
+    }
+    ivec4 operator-() const {
+        return _mm_sub_epi32(_mm_set1_epi32(0), val);
+    }
+    ivec4 operator&(const ivec4& other) const {
+        return _mm_and_si128(val, other);
+    }
+    ivec4 operator|(const ivec4& other) const {
+        return _mm_or_si128(val, other);
+    }
+    ivec4 operator==(const ivec4& other) const {
+        return _mm_cmpeq_epi32(val, other);
+    }
+    ivec4 operator!=(const ivec4& other) const {
+        return _mm_xor_si128(*this==other, _mm_set1_epi32(0xFFFFFFFF));
+    }
+    ivec4 operator>(const ivec4& other) const {
+        return _mm_cmpgt_epi32(val, other);
+    }
+    ivec4 operator<(const ivec4& other) const {
+        return _mm_cmplt_epi32(val, other);
+    }
+    ivec4 operator>=(const ivec4& other) const {
+        return _mm_xor_si128(_mm_cmplt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
+    }
+    ivec4 operator<=(const ivec4& other) const {
+        return _mm_xor_si128(_mm_cmpgt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
+    }
+    operator fvec4() const;
+};
+
+// Conversion operators.
+
+inline fvec4::operator ivec4() const {
+    return _mm_cvttps_epi32(val);
+}
+
+inline ivec4::operator fvec4() const {
+    return _mm_cvtepi32_ps(val);
+}
+
+// Functions that operate on fvec4s.
+
+static inline fvec4 floor(const fvec4& v) {
+    return fvec4(_mm_floor_ps(v.val));
+}
+
+static inline fvec4 ceil(const fvec4& v) {
+    return fvec4(_mm_ceil_ps(v.val));
+}
+
+static inline fvec4 round(const fvec4& v) {
+    return fvec4(_mm_round_ps(v.val, _MM_FROUND_TO_NEAREST_INT));
+}
+
+static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
+    return fvec4(_mm_min_ps(v1.val, v2.val));
+}
+
+static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
+    return fvec4(_mm_max_ps(v1.val, v2.val));
+}
+
+static inline fvec4 abs(const fvec4& v) {
+    static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
+    return fvec4(_mm_and_ps(v.val, mask));
+}
+
+static inline fvec4 sqrt(const fvec4& v) {
+    return fvec4(_mm_sqrt_ps(v.val));
+}
+
+static inline float dot3(const fvec4& v1, const fvec4& v2) {
+    return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
+}
+
+static inline float dot4(const fvec4& v1, const fvec4& v2) {
+    return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
+}
+
+static inline fvec4 cross(const fvec4& v1, const fvec4& v2) {
+    fvec4 temp = fvec4(_mm_mul_ps(v1, _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1)))) -
+                 fvec4(_mm_mul_ps(v2, _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1))));
+    return _mm_shuffle_ps(temp, temp, _MM_SHUFFLE(3, 0, 2, 1));
+}
+
+static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
+    _MM_TRANSPOSE4_PS(v1, v2, v3, v4);
+}
+
+// Functions that operate on ivec4s.
+
+static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
+    return ivec4(_mm_min_epi32(v1.val, v2.val));
+}
+
+static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
+    return ivec4(_mm_max_epi32(v1.val, v2.val));
+}
+
+static inline ivec4 abs(const ivec4& v) {
+    return ivec4(_mm_abs_epi32(v.val));
+}
+
+static inline bool any(const ivec4& v) {
+    return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
+}
+
+// Mathematical operators involving a scalar and a vector.
+
+static inline fvec4 operator+(float v1, const fvec4& v2) {
+    return fvec4(v1)+v2;
+}
+
+static inline fvec4 operator-(float v1, const fvec4& v2) {
+    return fvec4(v1)-v2;
+}
+
+static inline fvec4 operator*(float v1, const fvec4& v2) {
+    return fvec4(v1)*v2;
+}
+
+static inline fvec4 operator/(float v1, const fvec4& v2) {
+    return fvec4(v1)/v2;
+}
+
+// Operations for blending fvec4s based on an ivec4.
+
+static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const ivec4& mask) {
+    return fvec4(_mm_blendv_ps(v1.val, v2.val, _mm_castsi128_ps(mask.val)));
+}
+
+#endif /*OPENMM_VECTORIZE_SSE_H_*/
+
--- a/platforms/cpu/include/CpuCustomManyParticleForce.h
+++ b/platforms/cpu/include/CpuCustomManyParticleForce.h
-
-/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
- * Contributors: Peter Eastman
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef OPENMM_CPU_CUSTOM_MANY_PARTICLE_FORCE_H__
-#define OPENMM_CPU_CUSTOM_MANY_PARTICLE_FORCE_H__
-
-#include "ReferenceForce.h"
-#include "ReferenceBondIxn.h"
-#include "CompiledExpressionSet.h"
-#include "CpuNeighborList.h"
-#include "openmm/CustomManyParticleForce.h"
-#include "openmm/internal/ThreadPool.h"
-#include "openmm/internal/vectorize.h"
-#include "lepton/CompiledExpression.h"
-#include "lepton/ParsedExpression.h"
-#include <map>
-#include <set>
-#include <utility>
-#include <vector>
-
-namespace OpenMM {
-
-class CpuCustomManyParticleForce {
-private:
-
-    class ParticleTermInfo;
-    class DistanceTermInfo;
-    class AngleTermInfo;
-    class DihedralTermInfo;
-    class ComputeForceTask;
-    class ThreadData;
-    int numParticles, numParticlesPerSet, numPerParticleParameters, numTypes;
-    bool useCutoff, usePeriodic, centralParticleMode;
-    RealOpenMM cutoffDistance;
-    RealOpenMM periodicBoxSize[3];
-    CpuNeighborList* neighborList;
-    ThreadPool& threads;
-    std::vector<std::set<int> > exclusions;
-    std::vector<int> particleTypes;
-    std::vector<int> orderIndex;
-    std::vector<std::vector<int> > particleOrder;
-    std::vector<std::vector<int> > particleNeighbors;
-    std::vector<ThreadData*> threadData;
-    // The following variables are used to make information accessible to the individual threads.
-    float* posq;
-    RealOpenMM** particleParameters;        
-    const std::map<std::string, double>* globalParameters;
-    std::vector<AlignedArray<float> >* threadForce;
-    bool includeForces, includeEnergy;
-    void* atomicCounter;
-
-    /**
-     * This routine contains the code executed by each thread.
-     */
-    void threadComputeForce(ThreadPool& threads, int threadIndex);
-
-    /**
-     * This is called recursively to loop over all possible combination of a set of particles and evaluate the
-     * interaction for each one.
-     */
-    void loopOverInteractions(std::vector<int>& availableParticles, std::vector<int>& particleSet, int loopIndex, int startIndex,
-                              RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);
-
-    /**---------------------------------------------------------------------------------------
-
-       Calculate custom interaction for one set of particles
-
-       @param particleSet        the indices of the particles
-       @param posq               atom coordinates in float format
-       @param particleParameters particle parameter values (particleParameters[particleIndex][parameterIndex])
-       @param forces             force array (forces added)
-       @param totalEnergy        total energy
-
-       --------------------------------------------------------------------------------------- */
-
-    /**
-     * Calculate the interaction for one set of particles
-     * 
-     * @param particleSet        the indices of the particles
-     * @param particleParameters particle parameter values (particleParameters[particleIndex][parameterIndex])
-     * @param data               information and workspace for the current thread
-     * @param boxSize            the size of the periodic box
-     * @param invBoxSize         the inverse size of the periodic box
-     */
-    void calculateOneIxn(std::vector<int>& particleSet, RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);
-
-    /**
-     * Compute the displacement and squared distance between two points, optionally using
-     * periodic boundary conditions.
-     */
-    void computeDelta(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, const fvec4& boxSize, const fvec4& invBoxSize) const;
-    
-    static float computeAngle(const fvec4& vi, const fvec4& vj, float v2i, float v2j, float sign);
-    
-    static float getDihedralAngleBetweenThreeVectors(const fvec4& v1, const fvec4& v2, const fvec4& v3, fvec4& cross1, fvec4& cross2, const fvec4& signVector);
-
-public:
-    /**
-     * Create a new CpuCustomManyParticleForce.
-     *
-     * @param force      the CustomManyParticleForce to create it for
-     * @param threads    the thread pool to use
-     */
-    CpuCustomManyParticleForce(const OpenMM::CustomManyParticleForce& force, ThreadPool& threads);
-
-    ~CpuCustomManyParticleForce();
-
-    /**
-     * Set the force to use a cutoff.
-     * 
-     * @param distance   the cutoff distance
-     */
-    void setUseCutoff(RealOpenMM distance);
-
-    /**
-     * Set the force to use periodic boundary conditions.  This requires that a cutoff has
-     * already been set, and the smallest side of the periodic box is at least twice the cutoff
-     * distance.
-     * 
-     * @param boxSize    the X, Y, and Z widths of the periodic box
-     */
-    void setPeriodic(OpenMM::RealVec& boxSize);
-
-    /**
-     * Calculate the interaction.
-     * 
-     * @param posq               atom coordinates in float format
-     * @param particleParameters particle parameter values (particleParameters[particleIndex][parameterIndex])
-     * @param globalParameters   the values of global parameters
-     * @param threadForce        the collection of arrays for each thread to add forces to
-     * @param includeForce       whether to compute forces
-     * @param includeEnergy      whether to compute energy
-     * @param energy             the total energy is added to this
-     */
-    void calculateIxn(AlignedArray<float>& posq, RealOpenMM** particleParameters, const std::map<std::string, double>& globalParameters,
-                      std::vector<AlignedArray<float> >& threadForce, bool includeForces, bool includeEnergy, double& energy);
-};
-
-class CpuCustomManyParticleForce::ParticleTermInfo {
-public:
-    std::string name;
-    int atom, component, variableIndex;
-    Lepton::CompiledExpression forceExpression;
-    ParticleTermInfo(const std::string& name, int atom, int component, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
-};
-
-class CpuCustomManyParticleForce::DistanceTermInfo {
-public:
-    std::string name;
-    int p1, p2, variableIndex;
-    Lepton::CompiledExpression forceExpression;
-    int delta;
-    float deltaSign;
-    DistanceTermInfo(const std::string& name, const std::vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
-};
-
-class CpuCustomManyParticleForce::AngleTermInfo {
-public:
-    std::string name;
-    int p1, p2, p3, variableIndex;
-    Lepton::CompiledExpression forceExpression;
-    int delta1, delta2;
-    float delta1Sign, delta2Sign;
-    AngleTermInfo(const std::string& name, const std::vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
-};
-
-class CpuCustomManyParticleForce::DihedralTermInfo {
-public:
-    std::string name;
-    int p1, p2, p3, p4, variableIndex;
-    Lepton::CompiledExpression forceExpression;
-    int delta1, delta2, delta3;
-    mutable fvec4 cross1, cross2;
-    DihedralTermInfo(const std::string& name, const std::vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
-};
-
-class CpuCustomManyParticleForce::ThreadData {
-public:
-    CompiledExpressionSet expressionSet;
-    Lepton::CompiledExpression energyExpression;
-    std::vector<std::vector<int> > particleParamIndices;
-    std::vector<int> permutedParticles;
-    std::vector<std::pair<int, int> > deltaPairs;
-    std::vector<ParticleTermInfo> particleTerms;
-    std::vector<DistanceTermInfo> distanceTerms;
-    std::vector<AngleTermInfo> angleTerms;
-    std::vector<DihedralTermInfo> dihedralTerms;
-    AlignedArray<fvec4> delta;
-    std::vector<float> normDelta;
-    std::vector<float> norm2Delta;
-    AlignedArray<fvec4> f;
-    double energy;
-    ThreadData(const CustomManyParticleForce& force, Lepton::ParsedExpression& energyExpr,
-            std::map<std::string, std::vector<int> >& distances, std::map<std::string, std::vector<int> >& angles, std::map<std::string, std::vector<int> >& dihedrals);
-    /**
-     * Request a pair of particles whose distance or displacement vector is needed in the computation.
-     */
-    void requestDeltaPair(int p1, int p2, int& pairIndex, float& pairSign, bool allowReversed);
-};
-
-} // namespace OpenMM
-
-#endif // OPENMM_CPU_CUSTOM_MANY_PARTICLE_FORCE_H__
+
+/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
+ * Contributors: Peter Eastman
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject
+ * to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef OPENMM_CPU_CUSTOM_MANY_PARTICLE_FORCE_H__
+#define OPENMM_CPU_CUSTOM_MANY_PARTICLE_FORCE_H__
+
+#include "ReferenceForce.h"
+#include "ReferenceBondIxn.h"
+#include "CompiledExpressionSet.h"
+#include "CpuNeighborList.h"
+#include "openmm/CustomManyParticleForce.h"
+#include "openmm/internal/ThreadPool.h"
+#include "openmm/internal/vectorize.h"
+#include "lepton/CompiledExpression.h"
+#include "lepton/ParsedExpression.h"
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+namespace OpenMM {
+
+class CpuCustomManyParticleForce {
+private:
+
+    class ParticleTermInfo;
+    class DistanceTermInfo;
+    class AngleTermInfo;
+    class DihedralTermInfo;
+    class ComputeForceTask;
+    class ThreadData;
+    int numParticles, numParticlesPerSet, numPerParticleParameters, numTypes;
+    bool useCutoff, usePeriodic, centralParticleMode;
+    RealOpenMM cutoffDistance;
+    RealOpenMM periodicBoxSize[3];
+    CpuNeighborList* neighborList;
+    ThreadPool& threads;
+    std::vector<std::set<int> > exclusions;
+    std::vector<int> particleTypes;
+    std::vector<int> orderIndex;
+    std::vector<std::vector<int> > particleOrder;
+    std::vector<std::vector<int> > particleNeighbors;
+    std::vector<ThreadData*> threadData;
+    // The following variables are used to make information accessible to the individual threads.
+    float* posq;
+    RealOpenMM** particleParameters;        
+    const std::map<std::string, double>* globalParameters;
+    std::vector<AlignedArray<float> >* threadForce;
+    bool includeForces, includeEnergy;
+    void* atomicCounter;
+
+    /**
+     * This routine contains the code executed by each thread.
+     */
+    void threadComputeForce(ThreadPool& threads, int threadIndex);
+
+    /**
+     * This is called recursively to loop over all possible combination of a set of particles and evaluate the
+     * interaction for each one.
+     */
+    void loopOverInteractions(std::vector<int>& availableParticles, std::vector<int>& particleSet, int loopIndex, int startIndex,
+                              RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);
+
+    /**---------------------------------------------------------------------------------------
+
+       Calculate custom interaction for one set of particles
+
+       @param particleSet        the indices of the particles
+       @param posq               atom coordinates in float format
+       @param particleParameters particle parameter values (particleParameters[particleIndex][parameterIndex])
+       @param forces             force array (forces added)
+       @param totalEnergy        total energy
+
+       --------------------------------------------------------------------------------------- */
+
+    /**
+     * Calculate the interaction for one set of particles
+     * 
+     * @param particleSet        the indices of the particles
+     * @param particleParameters particle parameter values (particleParameters[particleIndex][parameterIndex])
+     * @param data               information and workspace for the current thread
+     * @param boxSize            the size of the periodic box
+     * @param invBoxSize         the inverse size of the periodic box
+     */
+    void calculateOneIxn(std::vector<int>& particleSet, RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);
+
+    /**
+     * Compute the displacement and squared distance between two points, optionally using
+     * periodic boundary conditions.
+     */
+    void computeDelta(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, const fvec4& boxSize, const fvec4& invBoxSize) const;
+    
+    static float computeAngle(const fvec4& vi, const fvec4& vj, float v2i, float v2j, float sign);
+    
+    static float getDihedralAngleBetweenThreeVectors(const fvec4& v1, const fvec4& v2, const fvec4& v3, fvec4& cross1, fvec4& cross2, const fvec4& signVector);
+
+public:
+    /**
+     * Create a new CpuCustomManyParticleForce.
+     *
+     * @param force      the CustomManyParticleForce to create it for
+     * @param threads    the thread pool to use
+     */
+    CpuCustomManyParticleForce(const OpenMM::CustomManyParticleForce& force, ThreadPool& threads);
+
+    ~CpuCustomManyParticleForce();
+
+    /**
+     * Set the force to use a cutoff.
+     * 
+     * @param distance   the cutoff distance
+     */
+    void setUseCutoff(RealOpenMM distance);
+
+    /**
+     * Set the force to use periodic boundary conditions.  This requires that a cutoff has
+     * already been set, and the smallest side of the periodic box is at least twice the cutoff
+     * distance.
+     * 
+     * @param boxSize    the X, Y, and Z widths of the periodic box
+     */
+    void setPeriodic(OpenMM::RealVec& boxSize);
+
+    /**
+     * Calculate the interaction.
+     * 
+     * @param posq               atom coordinates in float format
+     * @param particleParameters particle parameter values (particleParameters[particleIndex][parameterIndex])
+     * @param globalParameters   the values of global parameters
+     * @param threadForce        the collection of arrays for each thread to add forces to
+     * @param includeForce       whether to compute forces
+     * @param includeEnergy      whether to compute energy
+     * @param energy             the total energy is added to this
+     */
+    void calculateIxn(AlignedArray<float>& posq, RealOpenMM** particleParameters, const std::map<std::string, double>& globalParameters,
+                      std::vector<AlignedArray<float> >& threadForce, bool includeForces, bool includeEnergy, double& energy);
+};
+
+class CpuCustomManyParticleForce::ParticleTermInfo {
+public:
+    std::string name;
+    int atom, component, variableIndex;
+    Lepton::CompiledExpression forceExpression;
+    ParticleTermInfo(const std::string& name, int atom, int component, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
+};
+
+class CpuCustomManyParticleForce::DistanceTermInfo {
+public:
+    std::string name;
+    int p1, p2, variableIndex;
+    Lepton::CompiledExpression forceExpression;
+    int delta;
+    float deltaSign;
+    DistanceTermInfo(const std::string& name, const std::vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
+};
+
+class CpuCustomManyParticleForce::AngleTermInfo {
+public:
+    std::string name;
+    int p1, p2, p3, variableIndex;
+    Lepton::CompiledExpression forceExpression;
+    int delta1, delta2;
+    float delta1Sign, delta2Sign;
+    AngleTermInfo(const std::string& name, const std::vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
+};
+
+class CpuCustomManyParticleForce::DihedralTermInfo {
+public:
+    std::string name;
+    int p1, p2, p3, p4, variableIndex;
+    Lepton::CompiledExpression forceExpression;
+    int delta1, delta2, delta3;
+    DihedralTermInfo(const std::string& name, const std::vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data);
+};
+
+class CpuCustomManyParticleForce::ThreadData {
+public:
+    CompiledExpressionSet expressionSet;
+    Lepton::CompiledExpression energyExpression;
+    std::vector<std::vector<int> > particleParamIndices;
+    std::vector<int> permutedParticles;
+    std::vector<std::pair<int, int> > deltaPairs;
+    std::vector<ParticleTermInfo> particleTerms;
+    std::vector<DistanceTermInfo> distanceTerms;
+    std::vector<AngleTermInfo> angleTerms;
+    std::vector<DihedralTermInfo> dihedralTerms;
+    AlignedArray<fvec4> delta, cross1, cross2;
+    std::vector<float> normDelta;
+    std::vector<float> norm2Delta;
+    AlignedArray<fvec4> f;
+    double energy;
+    ThreadData(const CustomManyParticleForce& force, Lepton::ParsedExpression& energyExpr,
+            std::map<std::string, std::vector<int> >& distances, std::map<std::string, std::vector<int> >& angles, std::map<std::string, std::vector<int> >& dihedrals);
+    /**
+     * Request a pair of particles whose distance or displacement vector is needed in the computation.
+     */
+    void requestDeltaPair(int p1, int p2, int& pairIndex, float& pairSign, bool allowReversed);
+};
+
+} // namespace OpenMM
+
+#endif // OPENMM_CPU_CUSTOM_MANY_PARTICLE_FORCE_H__
--- a/platforms/cpu/src/CpuCustomManyParticleForce.cpp
+++ b/platforms/cpu/src/CpuCustomManyParticleForce.cpp
-
-/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
- * Contributors: Peter Eastman
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <string.h>
-#include <sstream>
-#include <utility>
-
-#include "SimTKOpenMMCommon.h"
-#include "SimTKOpenMMLog.h"
-#include "SimTKOpenMMUtilities.h"
-#include "ReferenceForce.h"
-#include "CpuCustomManyParticleForce.h"
-#include "ReferenceTabulatedFunction.h"
-#include "openmm/internal/CustomManyParticleForceImpl.h"
-#include "lepton/CustomFunction.h"
-#include "gmx_atomic.h"
-
-using namespace OpenMM;
-using namespace std;
-
-class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
-public:
-    ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
-    }
-    void execute(ThreadPool& threads, int threadIndex) {
-        owner.threadComputeForce(threads, threadIndex);
-    }
-    CpuCustomManyParticleForce& owner;
-};
-
-CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
-            threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
-    numParticles = force.getNumParticles();
-    numParticlesPerSet = force.getNumParticlesPerSet();
-    numPerParticleParameters = force.getNumPerParticleParameters();
-    centralParticleMode = (force.getPermutationMode() == CustomManyParticleForce::UniqueCentralParticle);
-    
-    // Create custom functions for the tabulated functions.
-
-    map<string, Lepton::CustomFunction*> functions;
-    for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
-        functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
-
-    // Parse the expression and create the objects used to calculate the interaction.
-
-    map<string, vector<int> > distances;
-    map<string, vector<int> > angles;
-    map<string, vector<int> > dihedrals;
-    Lepton::ParsedExpression energyExpr = CustomManyParticleForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
-    for (int i = 0; i < threads.getNumThreads(); i++)
-        threadData.push_back(new ThreadData(force, energyExpr, distances, angles, dihedrals));
-    if (force.getNonbondedMethod() != CustomManyParticleForce::NoCutoff)
-        setUseCutoff(force.getCutoffDistance());
-
-    // Delete the custom functions.
-
-    for (map<string, Lepton::CustomFunction*>::iterator iter = functions.begin(); iter != functions.end(); iter++)
-        delete iter->second;
-    
-    // Record exclusions.
-    
-    exclusions.resize(force.getNumParticles());
-    for (int i = 0; i < (int) force.getNumExclusions(); i++) {
-        int p1, p2;
-        force.getExclusionParticles(i, p1, p2);
-        exclusions[p1].insert(p2);
-        exclusions[p2].insert(p1);
-    }
-    
-    // Record information about type filters.
-    
-    CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypes, orderIndex, particleOrder);
-}
-
-CpuCustomManyParticleForce::~CpuCustomManyParticleForce() {
-    if (neighborList != NULL)
-        delete neighborList;
-    for (int i = 0; i < (int) threadData.size(); i++)
-        delete threadData[i];
-}
-
-void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpenMM** particleParameters,
-                                                  const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
-                                                  bool includeForces, bool includeEnergy, double& energy) {
-    // Record the parameters for the threads.
-    
-    this->posq = &posq[0];
-    this->particleParameters = particleParameters;
-    this->globalParameters = &globalParameters;
-    this->threadForce = &threadForce;
-    this->includeForces = includeForces;
-    this->includeEnergy = includeEnergy;
-    gmx_atomic_t counter;
-    gmx_atomic_set(&counter, 0);
-    this->atomicCounter = &counter;
-    if (useCutoff) {
-        // Construct a neighbor list.  We use CpuNeighborList to do this, but then copy the result
-        // into a new data structure.  This is needed because in UniqueCentralParticle mode, the
-        // the neighbor list needs to include symmetric pairs.
-        
-        particleNeighbors.resize(numParticles);
-        for (int i = 0; i < numParticles; i++)
-            particleNeighbors[i].clear();
-        float boxSizeFloat[] = {(float) periodicBoxSize[0], (float) periodicBoxSize[1], (float) periodicBoxSize[2]};
-        neighborList->computeNeighborList(numParticles, posq, exclusions, boxSizeFloat, usePeriodic, cutoffDistance, threads);
-        for (int blockIndex = 0; blockIndex < neighborList->getNumBlocks(); blockIndex++) {
-            const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
-            const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
-            int numNeighbors = neighbors.size();
-            for (int i = 0; i < 4; i++) {
-                int p1 = neighborList->getSortedAtoms()[4*blockIndex+i];
-                for (int j = 0; j < numNeighbors; j++) {
-                    if ((exclusions[j] & (1<<i)) == 0) {
-                        int p2 = neighbors[j];
-                        particleNeighbors[p1].push_back(p2);
-                        if (centralParticleMode)
-                            particleNeighbors[p2].push_back(p1);
-                    }
-                }
-            }
-        }
-    }
-    
-    // Signal the threads to start running and wait for them to finish.
-    
-    ComputeForceTask task(*this);
-    threads.execute(task);
-    threads.waitForThreads();
-    
-    // Combine the energies from all the threads.
-    
-    if (includeEnergy) {
-        int numThreads = threads.getNumThreads();
-        for (int i = 0; i < numThreads; i++)
-            energy += threadData[i]->energy;
-    }
-}
-
-void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
-    vector<int> particleIndices(numParticlesPerSet);
-    fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
-    fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
-    float* forces = &(*threadForce)[threadIndex][0];
-    ThreadData& data = *threadData[threadIndex];
-    data.energy = 0;
-    for (map<string, double>::const_iterator iter = globalParameters->begin(); iter != globalParameters->end(); ++iter)
-        data.expressionSet.setVariable(data.expressionSet.getVariableIndex(iter->first), iter->second);
-    if (useCutoff) {
-        // Loop over interactions from the neighbor list.
-        
-        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
-            if (i >= numParticles)
-                break;
-            particleIndices[0] = i;
-            loopOverInteractions(particleNeighbors[i], particleIndices, 1, 0, particleParameters, forces, data, boxSize, invBoxSize);
-        }
-    }
-    else {
-        // Loop over all possible sets of particles.
-        
-        vector<int> particles(numParticles);
-        for (int i = 0; i < numParticles; i++)
-            particles[i] = i;
-        while (true) {
-            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
-            if (i >= numParticles)
-                break;
-            particleIndices[0] = i;
-            int startIndex = (centralParticleMode ? 0 : i+1);
-            loopOverInteractions(particles, particleIndices, 1, startIndex, particleParameters, forces, data, boxSize, invBoxSize);
-        }
-    }
-}
-
-void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
-    useCutoff = true;
-    cutoffDistance = distance;
-    if (neighborList == NULL)
-        neighborList = new CpuNeighborList(4);
-}
-
-void CpuCustomManyParticleForce::setPeriodic(RealVec& boxSize) {
-    assert(useCutoff);
-    assert(boxSize[0] >= 2.0*cutoffDistance);
-    assert(boxSize[1] >= 2.0*cutoffDistance);
-    assert(boxSize[2] >= 2.0*cutoffDistance);
-    usePeriodic = true;
-    periodicBoxSize[0] = boxSize[0];
-    periodicBoxSize[1] = boxSize[1];
-    periodicBoxSize[2] = boxSize[2];
-}
-
-void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availableParticles, vector<int>& particleSet, int loopIndex, int startIndex,
-                                                          RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
-    int numParticles = availableParticles.size();
-    double cutoff2 = cutoffDistance*cutoffDistance;
-    int checkRange = (centralParticleMode ? 1 : loopIndex);
-    for (int i = startIndex; i < numParticles; i++) {
-        int particle = availableParticles[i];
-        
-        // Check whether this particle can actually participate in interactions with the others found so far.
-        
-        bool include = true;
-        if (useCutoff) {
-            fvec4 deltaR;
-            fvec4 pos1(posq+4*particle);
-            float r2;
-            for (int j = 0; j < checkRange && include; j++) {
-                fvec4 pos2(posq+4*particleSet[j]);
-                computeDelta(pos1, pos2, deltaR, r2, boxSize, invBoxSize);
-                include &= (r2 < cutoff2);
-            }
-        }
-        for (int j = 0; j < loopIndex && include; j++)
-            include &= (exclusions[particle].find(particleSet[j]) == exclusions[particle].end());
-        if (include) {
-            if (loopIndex > 0 && availableParticles[i] == particleSet[0])
-                continue;
-            particleSet[loopIndex] = availableParticles[i];
-            if (loopIndex == numParticlesPerSet-1)
-                calculateOneIxn(particleSet, particleParameters, forces, data, boxSize, invBoxSize);
-            else
-                loopOverInteractions(availableParticles, particleSet, loopIndex+1, i+1, particleParameters, forces, data, boxSize, invBoxSize);
-        }
-    }
-}
-
-void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
-    // Select the ordering to use for the particles.
-    
-    vector<int>& permutedParticles = data.permutedParticles;
-    if (particleOrder.size() == 1) {
-        // There are no filters, so we don't need to worry about ordering.
-        
-        permutedParticles = particleSet;
-    }
-    else {
-        int index = 0;
-        for (int i = numParticlesPerSet-1; i >= 0; i--)
-            index = particleTypes[particleSet[i]]+numTypes*index;
-        int order = orderIndex[index];
-        if (order == -1)
-            return;
-        for (int i = 0; i < numParticlesPerSet; i++)
-            permutedParticles[i] = particleSet[particleOrder[order][i]];
-    }
-
-    // Record per-particle parameters.
-    
-    CompiledExpressionSet& expressionSet = data.expressionSet;
-    for (int i = 0; i < numParticlesPerSet; i++)
-        for (int j = 0; j < numPerParticleParameters; j++)
-            expressionSet.setVariable(data.particleParamIndices[i][j], particleParameters[permutedParticles[i]][j]);
-    
-    // Compute inter-particle deltas.
-    
-    int numDeltas = data.deltaPairs.size();
-    AlignedArray<fvec4>& delta = data.delta;
-    vector<float>& normDelta = data.normDelta;
-    vector<float>& norm2Delta = data.norm2Delta;
-    for (int i = 0; i < numDeltas; i++) {
-        int p1 = permutedParticles[data.deltaPairs[i].first];
-        int p2 = permutedParticles[data.deltaPairs[i].second];
-        computeDelta(fvec4(posq+4*p1), fvec4(posq+4*p2), delta[i], norm2Delta[i], boxSize, invBoxSize);
-        normDelta[i] = sqrtf(norm2Delta[i]);
-    }
-    
-    // Compute all of the variables the energy can depend on.
-
-    for (int i = 0; i < (int) data.particleTerms.size(); i++) {
-        const ParticleTermInfo& term = data.particleTerms[i];
-        expressionSet.setVariable(term.variableIndex, posq[4*permutedParticles[term.atom]+term.component]);
-    }
-    for (int i = 0; i < (int) data.distanceTerms.size(); i++) {
-        const DistanceTermInfo& term = data.distanceTerms[i];
-        expressionSet.setVariable(term.variableIndex, normDelta[term.delta]);
-    }
-    for (int i = 0; i < (int) data.angleTerms.size(); i++) {
-        const AngleTermInfo& term = data.angleTerms[i];
-        expressionSet.setVariable(term.variableIndex, computeAngle(delta[term.delta1], delta[term.delta2], norm2Delta[term.delta1], norm2Delta[term.delta2], term.delta1Sign*term.delta2Sign));
-    }
-    for (int i = 0; i < (int) data.dihedralTerms.size(); i++) {
-        const DihedralTermInfo& term = data.dihedralTerms[i];
-        expressionSet.setVariable(term.variableIndex, getDihedralAngleBetweenThreeVectors(delta[term.delta1], delta[term.delta2], delta[term.delta3], term.cross1, term.cross2, delta[term.delta1]));
-    }
-    
-    if (includeForces) {
-        // Apply forces based on individual particle coordinates.
-
-        AlignedArray<fvec4>& f = data.f;
-        for (int i = 0; i < numParticlesPerSet; i++)
-            f[i] = fvec4(0.0f);
-        for (int i = 0; i < (int) data.particleTerms.size(); i++) {
-            const ParticleTermInfo& term = data.particleTerms[i];
-            float temp[4];
-            f[term.atom].store(temp);
-            temp[term.component] -= term.forceExpression.evaluate();
-            f[term.atom] = fvec4(temp);
-        }
-
-        // Apply forces based on distances.
-
-        for (int i = 0; i < (int) data.distanceTerms.size(); i++) {
-            const DistanceTermInfo& term = data.distanceTerms[i];
-            float dEdR = (float) (term.forceExpression.evaluate()*term.deltaSign/(normDelta[term.delta]));
-            fvec4 force = -dEdR*delta[term.delta];
-            f[term.p1] -= force;
-            f[term.p2] += force;
-        }
-
-        // Apply forces based on angles.
-
-        for (int i = 0; i < (int) data.angleTerms.size(); i++) {
-            const AngleTermInfo& term = data.angleTerms[i];
-            float dEdTheta = (float) term.forceExpression.evaluate();
-            fvec4 thetaCross = cross(delta[term.delta1], delta[term.delta2]);
-            float lengthThetaCross = sqrtf(dot3(thetaCross, thetaCross));
-            if (lengthThetaCross < 1.0e-6f)
-                lengthThetaCross = 1.0e-6f;
-            float termA = dEdTheta*term.delta2Sign/(norm2Delta[term.delta1]*lengthThetaCross);
-            float termC = -dEdTheta*term.delta1Sign/(norm2Delta[term.delta2]*lengthThetaCross);
-            fvec4 deltaCross1 = cross(delta[term.delta1], thetaCross);
-            fvec4 deltaCross2 = cross(delta[term.delta2], thetaCross);
-            fvec4 force1 = termA*deltaCross1;
-            fvec4 force3 = termC*deltaCross2;
-            fvec4 force2 = -(force1+force3);
-            f[term.p1] += force1;
-            f[term.p2] += force2;
-            f[term.p3] += force3;
-        }
-
-        // Apply forces based on dihedrals.
-
-        for (int i = 0; i < (int) data.dihedralTerms.size(); i++) {
-            const DihedralTermInfo& term = data.dihedralTerms[i];
-            float dEdTheta = (float) term.forceExpression.evaluate();
-            float normCross1 = dot3(term.cross1, term.cross1);
-            float normBC = normDelta[term.delta2];
-            float forceFactors[4];
-            forceFactors[0] = (-dEdTheta*normBC)/normCross1;
-            float normCross2 = dot3(term.cross2, term.cross2);
-            forceFactors[3] = (dEdTheta*normBC)/normCross2;
-            forceFactors[1] = dot3(delta[term.delta1], delta[term.delta2]);
-            forceFactors[1] /= norm2Delta[term.delta2];
-            forceFactors[2] = dot3(delta[term.delta3], delta[term.delta2]);
-            forceFactors[2] /= norm2Delta[term.delta2];
-            fvec4 force1 = forceFactors[0]*term.cross1;
-            fvec4 force4 = forceFactors[3]*term.cross2;
-            fvec4 s = forceFactors[1]*force1 - forceFactors[2]*force4;
-            f[term.p1] += force1;
-            f[term.p2] -= force1-s;
-            f[term.p3] -= force4+s;
-            f[term.p4] += force4;
-        }
-
-        // Store the forces.
-
-        for (int i = 0; i < numParticlesPerSet; i++) {
-            int index = permutedParticles[i];
-            (fvec4(forces+4*index)+f[i]).store(forces+4*index);
-        }
-    }
-
-    // Add the energy
-
-    if (includeEnergy)
-        data.energy += data.energyExpression.evaluate();
-}
-
-void CpuCustomManyParticleForce::computeDelta(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, const fvec4& boxSize, const fvec4& invBoxSize) const {
-    deltaR = posJ-posI;
-    if (usePeriodic) {
-        fvec4 base = round(deltaR*invBoxSize)*boxSize;
-        deltaR = deltaR-base;
-    }
-    r2 = dot3(deltaR, deltaR);
-}
-
-float CpuCustomManyParticleForce::computeAngle(const fvec4& vi, const fvec4& vj, float v2i, float v2j, float sign) {
-    float dot = dot3(vi, vj)*sign;
-    float cosine = dot/sqrtf(v2i*v2j);
-    if (cosine > 0.99f || cosine < -0.99f) {
-        // We're close to the singularity in acos(), so take the cross product and use asin() instead.
-
-        fvec4 cross12 = cross(vi, vj);
-        float scale = v2i*v2j;
-        float angle = asinf(sqrtf(dot3(cross12, cross12)/scale));
-        if (cosine < 0.0f)
-            angle = (float) (M_PI-angle);
-        return angle;
-    }
-    return acosf(cosine);
-}
-
-float CpuCustomManyParticleForce::getDihedralAngleBetweenThreeVectors(const fvec4& v1, const fvec4& v2, const fvec4& v3, fvec4& cross1, fvec4& cross2, const fvec4& signVector) {
-    cross1 = cross(v1, v2);
-    cross2 = cross(v2, v3);
-    float angle = computeAngle(cross1, cross2, dot3(cross1, cross1), dot3(cross2, cross2), 1.0f);
-    float dotProduct = dot3(signVector, cross2);
-    if (dotProduct < 0) 
-        angle = -angle;
-    return angle;
-}
-
-CpuCustomManyParticleForce::ParticleTermInfo::ParticleTermInfo(const string& name, int atom, int component, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
-        name(name), atom(atom), component(component), forceExpression(forceExpression) {
-    variableIndex = data.expressionSet.getVariableIndex(name);
-}
-
-CpuCustomManyParticleForce::DistanceTermInfo::DistanceTermInfo(const string& name, const vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
-        name(name), p1(atoms[0]), p2(atoms[1]), forceExpression(forceExpression) {
-    variableIndex = data.expressionSet.getVariableIndex(name);
-    data.requestDeltaPair(p1, p2, delta, deltaSign, true);
-}
-
-CpuCustomManyParticleForce::AngleTermInfo::AngleTermInfo(const string& name, const vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
-        name(name), p1(atoms[0]), p2(atoms[1]), p3(atoms[2]), forceExpression(forceExpression) {
-    variableIndex = data.expressionSet.getVariableIndex(name);
-    data.requestDeltaPair(p1, p2,delta1, delta1Sign, true);
-    data.requestDeltaPair(p3, p2, delta2, delta2Sign, true);
-}
-
-CpuCustomManyParticleForce::DihedralTermInfo::DihedralTermInfo(const string& name, const vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
-        name(name), p1(atoms[0]), p2(atoms[1]), p3(atoms[2]), p4(atoms[3]), forceExpression(forceExpression) {
-    variableIndex = data.expressionSet.getVariableIndex(name);
-    float sign;
-    data.requestDeltaPair(p2, p1, delta1, sign, false);
-    data.requestDeltaPair(p2, p3, delta2, sign, false);
-    data.requestDeltaPair(p4, p3, delta3, sign, false);
-}
-
-CpuCustomManyParticleForce::ThreadData::ThreadData(const CustomManyParticleForce& force, Lepton::ParsedExpression& energyExpr,
-            map<string, vector<int> >& distances, map<string, vector<int> >& angles, map<string, vector<int> >& dihedrals) {
-    int numParticlesPerSet = force.getNumParticlesPerSet();
-    int numPerParticleParameters = force.getNumPerParticleParameters();
-    particleParamIndices.resize(numParticlesPerSet);
-    permutedParticles.resize(numParticlesPerSet);
-    f.resize(numParticlesPerSet);
-    energyExpression = energyExpr.createCompiledExpression();
-    expressionSet.registerExpression(energyExpression);
-
-    // Differentiate the energy to get expressions for the force.
-
-    for (int i = 0; i < numParticlesPerSet; i++) {
-        stringstream xname, yname, zname;
-        xname << 'x' << (i+1);
-        yname << 'y' << (i+1);
-        zname << 'z' << (i+1);
-        particleTerms.push_back(CpuCustomManyParticleForce::ParticleTermInfo(xname.str(), i, 0, energyExpr.differentiate(xname.str()).optimize().createCompiledExpression(), *this));
-        particleTerms.push_back(CpuCustomManyParticleForce::ParticleTermInfo(yname.str(), i, 1, energyExpr.differentiate(yname.str()).optimize().createCompiledExpression(), *this));
-        particleTerms.push_back(CpuCustomManyParticleForce::ParticleTermInfo(zname.str(), i, 2, energyExpr.differentiate(zname.str()).optimize().createCompiledExpression(), *this));
-        for (int j = 0; j < numPerParticleParameters; j++) {
-            stringstream paramname;
-            paramname << force.getPerParticleParameterName(j) << (i+1);
-            particleParamIndices[i].push_back(expressionSet.getVariableIndex(paramname.str()));
-        }
-    }
-    for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter)
-        dihedralTerms.push_back(CpuCustomManyParticleForce::DihedralTermInfo(iter->first, iter->second, energyExpr.differentiate(iter->first).optimize().createCompiledExpression(), *this));
-    for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter)
-        distanceTerms.push_back(CpuCustomManyParticleForce::DistanceTermInfo(iter->first, iter->second, energyExpr.differentiate(iter->first).optimize().createCompiledExpression(), *this));
-    for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter)
-        angleTerms.push_back(CpuCustomManyParticleForce::AngleTermInfo(iter->first, iter->second, energyExpr.differentiate(iter->first).optimize().createCompiledExpression(), *this));
-    for (int i = 0; i < particleTerms.size(); i++)
-        expressionSet.registerExpression(particleTerms[i].forceExpression);
-    for (int i = 0; i < distanceTerms.size(); i++)
-        expressionSet.registerExpression(distanceTerms[i].forceExpression);
-    for (int i = 0; i < angleTerms.size(); i++)
-        expressionSet.registerExpression(angleTerms[i].forceExpression);
-    for (int i = 0; i < dihedralTerms.size(); i++)
-        expressionSet.registerExpression(dihedralTerms[i].forceExpression);
-    int numDeltas = deltaPairs.size();
-    delta.resize(numDeltas);
-    normDelta.resize(numDeltas);
-    norm2Delta.resize(numDeltas);
-}
-
-void CpuCustomManyParticleForce::ThreadData::requestDeltaPair(int p1, int p2, int& pairIndex, float& pairSign, bool allowReversed) {
-    for (int i = 0; i < (int) deltaPairs.size(); i++) {
-        if (deltaPairs[i].first == p1 && deltaPairs[i].second == p2) {
-            pairIndex = i;
-            pairSign = 1;
-            return;
-        }
-        if (deltaPairs[i].first == p2 && deltaPairs[i].second == p1 && allowReversed) {
-            pairIndex = i;
-            pairSign = -1;
-            return;
-        }
-    }
-    pairIndex = deltaPairs.size();
-    pairSign = 1;
-    deltaPairs.push_back(make_pair(p1, p2));
-}
+
+/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
+ * Contributors: Peter Eastman
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject
+ * to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <string.h>
+#include <sstream>
+#include <utility>
+
+#include "SimTKOpenMMCommon.h"
+#include "SimTKOpenMMLog.h"
+#include "SimTKOpenMMUtilities.h"
+#include "ReferenceForce.h"
+#include "CpuCustomManyParticleForce.h"
+#include "ReferenceTabulatedFunction.h"
+#include "openmm/internal/CustomManyParticleForceImpl.h"
+#include "lepton/CustomFunction.h"
+#include "gmx_atomic.h"
+
+using namespace OpenMM;
+using namespace std;
+
+class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
+public:
+    ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
+    }
+    void execute(ThreadPool& threads, int threadIndex) {
+        owner.threadComputeForce(threads, threadIndex);
+    }
+    CpuCustomManyParticleForce& owner;
+};
+
+CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
+            threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
+    numParticles = force.getNumParticles();
+    numParticlesPerSet = force.getNumParticlesPerSet();
+    numPerParticleParameters = force.getNumPerParticleParameters();
+    centralParticleMode = (force.getPermutationMode() == CustomManyParticleForce::UniqueCentralParticle);
+    
+    // Create custom functions for the tabulated functions.
+
+    map<string, Lepton::CustomFunction*> functions;
+    for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
+        functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
+
+    // Parse the expression and create the objects used to calculate the interaction.
+
+    map<string, vector<int> > distances;
+    map<string, vector<int> > angles;
+    map<string, vector<int> > dihedrals;
+    Lepton::ParsedExpression energyExpr = CustomManyParticleForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
+    for (int i = 0; i < threads.getNumThreads(); i++)
+        threadData.push_back(new ThreadData(force, energyExpr, distances, angles, dihedrals));
+    if (force.getNonbondedMethod() != CustomManyParticleForce::NoCutoff)
+        setUseCutoff(force.getCutoffDistance());
+
+    // Delete the custom functions.
+
+    for (map<string, Lepton::CustomFunction*>::iterator iter = functions.begin(); iter != functions.end(); iter++)
+        delete iter->second;
+    
+    // Record exclusions.
+    
+    exclusions.resize(force.getNumParticles());
+    for (int i = 0; i < (int) force.getNumExclusions(); i++) {
+        int p1, p2;
+        force.getExclusionParticles(i, p1, p2);
+        exclusions[p1].insert(p2);
+        exclusions[p2].insert(p1);
+    }
+    
+    // Record information about type filters.
+    
+    CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypes, orderIndex, particleOrder);
+}
+
+CpuCustomManyParticleForce::~CpuCustomManyParticleForce() {
+    if (neighborList != NULL)
+        delete neighborList;
+    for (int i = 0; i < (int) threadData.size(); i++)
+        delete threadData[i];
+}
+
+void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpenMM** particleParameters,
+                                                  const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
+                                                  bool includeForces, bool includeEnergy, double& energy) {
+    // Record the parameters for the threads.
+    
+    this->posq = &posq[0];
+    this->particleParameters = particleParameters;
+    this->globalParameters = &globalParameters;
+    this->threadForce = &threadForce;
+    this->includeForces = includeForces;
+    this->includeEnergy = includeEnergy;
+    gmx_atomic_t counter;
+    gmx_atomic_set(&counter, 0);
+    this->atomicCounter = &counter;
+    if (useCutoff) {
+        // Construct a neighbor list.  We use CpuNeighborList to do this, but then copy the result
+        // into a new data structure.  This is needed because in UniqueCentralParticle mode, the
+        // the neighbor list needs to include symmetric pairs.
+        
+        particleNeighbors.resize(numParticles);
+        for (int i = 0; i < numParticles; i++)
+            particleNeighbors[i].clear();
+        float boxSizeFloat[] = {(float) periodicBoxSize[0], (float) periodicBoxSize[1], (float) periodicBoxSize[2]};
+        neighborList->computeNeighborList(numParticles, posq, exclusions, boxSizeFloat, usePeriodic, cutoffDistance, threads);
+        for (int blockIndex = 0; blockIndex < neighborList->getNumBlocks(); blockIndex++) {
+            const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
+            const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
+            int numNeighbors = neighbors.size();
+            for (int i = 0; i < 4; i++) {
+                int p1 = neighborList->getSortedAtoms()[4*blockIndex+i];
+                for (int j = 0; j < numNeighbors; j++) {
+                    if ((exclusions[j] & (1<<i)) == 0) {
+                        int p2 = neighbors[j];
+                        particleNeighbors[p1].push_back(p2);
+                        if (centralParticleMode)
+                            particleNeighbors[p2].push_back(p1);
+                    }
+                }
+            }
+        }
+    }
+    
+    // Signal the threads to start running and wait for them to finish.
+    
+    ComputeForceTask task(*this);
+    threads.execute(task);
+    threads.waitForThreads();
+    
+    // Combine the energies from all the threads.
+    
+    if (includeEnergy) {
+        int numThreads = threads.getNumThreads();
+        for (int i = 0; i < numThreads; i++)
+            energy += threadData[i]->energy;
+    }
+}
+
+void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
+    vector<int> particleIndices(numParticlesPerSet);
+    fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
+    fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
+    float* forces = &(*threadForce)[threadIndex][0];
+    ThreadData& data = *threadData[threadIndex];
+    data.energy = 0;
+    for (map<string, double>::const_iterator iter = globalParameters->begin(); iter != globalParameters->end(); ++iter)
+        data.expressionSet.setVariable(data.expressionSet.getVariableIndex(iter->first), iter->second);
+    if (useCutoff) {
+        // Loop over interactions from the neighbor list.
+        
+        while (true) {
+            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            if (i >= numParticles)
+                break;
+            particleIndices[0] = i;
+            loopOverInteractions(particleNeighbors[i], particleIndices, 1, 0, particleParameters, forces, data, boxSize, invBoxSize);
+        }
+    }
+    else {
+        // Loop over all possible sets of particles.
+        
+        vector<int> particles(numParticles);
+        for (int i = 0; i < numParticles; i++)
+            particles[i] = i;
+        while (true) {
+            int i = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
+            if (i >= numParticles)
+                break;
+            particleIndices[0] = i;
+            int startIndex = (centralParticleMode ? 0 : i+1);
+            loopOverInteractions(particles, particleIndices, 1, startIndex, particleParameters, forces, data, boxSize, invBoxSize);
+        }
+    }
+}
+
+void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
+    useCutoff = true;
+    cutoffDistance = distance;
+    if (neighborList == NULL)
+        neighborList = new CpuNeighborList(4);
+}
+
+void CpuCustomManyParticleForce::setPeriodic(RealVec& boxSize) {
+    assert(useCutoff);
+    assert(boxSize[0] >= 2.0*cutoffDistance);
+    assert(boxSize[1] >= 2.0*cutoffDistance);
+    assert(boxSize[2] >= 2.0*cutoffDistance);
+    usePeriodic = true;
+    periodicBoxSize[0] = boxSize[0];
+    periodicBoxSize[1] = boxSize[1];
+    periodicBoxSize[2] = boxSize[2];
+}
+
+void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availableParticles, vector<int>& particleSet, int loopIndex, int startIndex,
+                                                          RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
+    int numParticles = availableParticles.size();
+    double cutoff2 = cutoffDistance*cutoffDistance;
+    int checkRange = (centralParticleMode ? 1 : loopIndex);
+    for (int i = startIndex; i < numParticles; i++) {
+        int particle = availableParticles[i];
+        
+        // Check whether this particle can actually participate in interactions with the others found so far.
+        
+        bool include = true;
+        if (useCutoff) {
+            fvec4 deltaR;
+            fvec4 pos1(posq+4*particle);
+            float r2;
+            for (int j = 0; j < checkRange && include; j++) {
+                fvec4 pos2(posq+4*particleSet[j]);
+                computeDelta(pos1, pos2, deltaR, r2, boxSize, invBoxSize);
+                include &= (r2 < cutoff2);
+            }
+        }
+        for (int j = 0; j < loopIndex && include; j++)
+            include &= (exclusions[particle].find(particleSet[j]) == exclusions[particle].end());
+        if (include) {
+            if (loopIndex > 0 && availableParticles[i] == particleSet[0])
+                continue;
+            particleSet[loopIndex] = availableParticles[i];
+            if (loopIndex == numParticlesPerSet-1)
+                calculateOneIxn(particleSet, particleParameters, forces, data, boxSize, invBoxSize);
+            else
+                loopOverInteractions(availableParticles, particleSet, loopIndex+1, i+1, particleParameters, forces, data, boxSize, invBoxSize);
+        }
+    }
+}
+
+void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
+    // Select the ordering to use for the particles.
+    
+    vector<int>& permutedParticles = data.permutedParticles;
+    if (particleOrder.size() == 1) {
+        // There are no filters, so we don't need to worry about ordering.
+        
+        permutedParticles = particleSet;
+    }
+    else {
+        int index = 0;
+        for (int i = numParticlesPerSet-1; i >= 0; i--)
+            index = particleTypes[particleSet[i]]+numTypes*index;
+        int order = orderIndex[index];
+        if (order == -1)
+            return;
+        for (int i = 0; i < numParticlesPerSet; i++)
+            permutedParticles[i] = particleSet[particleOrder[order][i]];
+    }
+
+    // Record per-particle parameters.
+    
+    CompiledExpressionSet& expressionSet = data.expressionSet;
+    for (int i = 0; i < numParticlesPerSet; i++)
+        for (int j = 0; j < numPerParticleParameters; j++)
+            expressionSet.setVariable(data.particleParamIndices[i][j], particleParameters[permutedParticles[i]][j]);
+    
+    // Compute inter-particle deltas.
+    
+    int numDeltas = data.deltaPairs.size();
+    AlignedArray<fvec4>& delta = data.delta;
+    AlignedArray<fvec4>& cross1 = data.cross1;
+    AlignedArray<fvec4>& cross2 = data.cross2;
+    vector<float>& normDelta = data.normDelta;
+    vector<float>& norm2Delta = data.norm2Delta;
+    for (int i = 0; i < numDeltas; i++) {
+        int p1 = permutedParticles[data.deltaPairs[i].first];
+        int p2 = permutedParticles[data.deltaPairs[i].second];
+        computeDelta(fvec4(posq+4*p1), fvec4(posq+4*p2), delta[i], norm2Delta[i], boxSize, invBoxSize);
+        normDelta[i] = sqrtf(norm2Delta[i]);
+    }
+    
+    // Compute all of the variables the energy can depend on.
+
+    for (int i = 0; i < (int) data.particleTerms.size(); i++) {
+        const ParticleTermInfo& term = data.particleTerms[i];
+        expressionSet.setVariable(term.variableIndex, posq[4*permutedParticles[term.atom]+term.component]);
+    }
+    for (int i = 0; i < (int) data.distanceTerms.size(); i++) {
+        const DistanceTermInfo& term = data.distanceTerms[i];
+        expressionSet.setVariable(term.variableIndex, normDelta[term.delta]);
+    }
+    for (int i = 0; i < (int) data.angleTerms.size(); i++) {
+        const AngleTermInfo& term = data.angleTerms[i];
+        expressionSet.setVariable(term.variableIndex, computeAngle(delta[term.delta1], delta[term.delta2], norm2Delta[term.delta1], norm2Delta[term.delta2], term.delta1Sign*term.delta2Sign));
+    }
+    for (int i = 0; i < (int) data.dihedralTerms.size(); i++) {
+        const DihedralTermInfo& term = data.dihedralTerms[i];
+        expressionSet.setVariable(term.variableIndex, getDihedralAngleBetweenThreeVectors(delta[term.delta1], delta[term.delta2], delta[term.delta3], cross1[i], cross2[i], delta[term.delta1]));
+    }
+    
+    if (includeForces) {
+        // Apply forces based on individual particle coordinates.
+
+        AlignedArray<fvec4>& f = data.f;
+        for (int i = 0; i < numParticlesPerSet; i++)
+            f[i] = fvec4(0.0f);
+        for (int i = 0; i < (int) data.particleTerms.size(); i++) {
+            const ParticleTermInfo& term = data.particleTerms[i];
+            float temp[4];
+            f[term.atom].store(temp);
+            temp[term.component] -= term.forceExpression.evaluate();
+            f[term.atom] = fvec4(temp);
+        }
+
+        // Apply forces based on distances.
+
+        for (int i = 0; i < (int) data.distanceTerms.size(); i++) {
+            const DistanceTermInfo& term = data.distanceTerms[i];
+            float dEdR = (float) (term.forceExpression.evaluate()*term.deltaSign/(normDelta[term.delta]));
+            fvec4 force = -dEdR*delta[term.delta];
+            f[term.p1] -= force;
+            f[term.p2] += force;
+        }
+
+        // Apply forces based on angles.
+
+        for (int i = 0; i < (int) data.angleTerms.size(); i++) {
+            const AngleTermInfo& term = data.angleTerms[i];
+            float dEdTheta = (float) term.forceExpression.evaluate();
+            fvec4 thetaCross = cross(delta[term.delta1], delta[term.delta2]);
+            float lengthThetaCross = sqrtf(dot3(thetaCross, thetaCross));
+            if (lengthThetaCross < 1.0e-6f)
+                lengthThetaCross = 1.0e-6f;
+            float termA = dEdTheta*term.delta2Sign/(norm2Delta[term.delta1]*lengthThetaCross);
+            float termC = -dEdTheta*term.delta1Sign/(norm2Delta[term.delta2]*lengthThetaCross);
+            fvec4 deltaCross1 = cross(delta[term.delta1], thetaCross);
+            fvec4 deltaCross2 = cross(delta[term.delta2], thetaCross);
+            fvec4 force1 = termA*deltaCross1;
+            fvec4 force3 = termC*deltaCross2;
+            fvec4 force2 = -(force1+force3);
+            f[term.p1] += force1;
+            f[term.p2] += force2;
+            f[term.p3] += force3;
+        }
+
+        // Apply forces based on dihedrals.
+
+        for (int i = 0; i < (int) data.dihedralTerms.size(); i++) {
+            const DihedralTermInfo& term = data.dihedralTerms[i];
+            float dEdTheta = (float) term.forceExpression.evaluate();
+            float normCross1 = dot3(cross1[i], cross1[i]);
+            float normBC = normDelta[term.delta2];
+            float forceFactors[4];
+            forceFactors[0] = (-dEdTheta*normBC)/normCross1;
+            float normCross2 = dot3(cross2[i], cross2[i]);
+            forceFactors[3] = (dEdTheta*normBC)/normCross2;
+            forceFactors[1] = dot3(delta[term.delta1], delta[term.delta2]);
+            forceFactors[1] /= norm2Delta[term.delta2];
+            forceFactors[2] = dot3(delta[term.delta3], delta[term.delta2]);
+            forceFactors[2] /= norm2Delta[term.delta2];
+            fvec4 force1 = forceFactors[0]*cross1[i];
+            fvec4 force4 = forceFactors[3]*cross2[i];
+            fvec4 s = forceFactors[1]*force1 - forceFactors[2]*force4;
+            f[term.p1] += force1;
+            f[term.p2] -= force1-s;
+            f[term.p3] -= force4+s;
+            f[term.p4] += force4;
+        }
+
+        // Store the forces.
+
+        for (int i = 0; i < numParticlesPerSet; i++) {
+            int index = permutedParticles[i];
+            (fvec4(forces+4*index)+f[i]).store(forces+4*index);
+        }
+    }
+
+    // Add the energy
+
+    if (includeEnergy)
+        data.energy += data.energyExpression.evaluate();
+}
+
+void CpuCustomManyParticleForce::computeDelta(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, const fvec4& boxSize, const fvec4& invBoxSize) const {
+    deltaR = posJ-posI;
+    if (usePeriodic) {
+        fvec4 base = round(deltaR*invBoxSize)*boxSize;
+        deltaR = deltaR-base;
+    }
+    r2 = dot3(deltaR, deltaR);
+}
+
+float CpuCustomManyParticleForce::computeAngle(const fvec4& vi, const fvec4& vj, float v2i, float v2j, float sign) {
+    float dot = dot3(vi, vj)*sign;
+    float cosine = dot/sqrtf(v2i*v2j);
+    if (cosine > 0.99f || cosine < -0.99f) {
+        // We're close to the singularity in acos(), so take the cross product and use asin() instead.
+
+        fvec4 cross12 = cross(vi, vj);
+        float scale = v2i*v2j;
+        float angle = asinf(sqrtf(dot3(cross12, cross12)/scale));
+        if (cosine < 0.0f)
+            angle = (float) (M_PI-angle);
+        return angle;
+    }
+    return acosf(cosine);
+}
+
+float CpuCustomManyParticleForce::getDihedralAngleBetweenThreeVectors(const fvec4& v1, const fvec4& v2, const fvec4& v3, fvec4& cross1, fvec4& cross2, const fvec4& signVector) {
+    cross1 = cross(v1, v2);
+    cross2 = cross(v2, v3);
+    float angle = computeAngle(cross1, cross2, dot3(cross1, cross1), dot3(cross2, cross2), 1.0f);
+    float dotProduct = dot3(signVector, cross2);
+    if (dotProduct < 0) 
+        angle = -angle;
+    return angle;
+}
+
+CpuCustomManyParticleForce::ParticleTermInfo::ParticleTermInfo(const string& name, int atom, int component, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
+        name(name), atom(atom), component(component), forceExpression(forceExpression) {
+    variableIndex = data.expressionSet.getVariableIndex(name);
+}
+
+CpuCustomManyParticleForce::DistanceTermInfo::DistanceTermInfo(const string& name, const vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
+        name(name), p1(atoms[0]), p2(atoms[1]), forceExpression(forceExpression) {
+    variableIndex = data.expressionSet.getVariableIndex(name);
+    data.requestDeltaPair(p1, p2, delta, deltaSign, true);
+}
+
+CpuCustomManyParticleForce::AngleTermInfo::AngleTermInfo(const string& name, const vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
+        name(name), p1(atoms[0]), p2(atoms[1]), p3(atoms[2]), forceExpression(forceExpression) {
+    variableIndex = data.expressionSet.getVariableIndex(name);
+    data.requestDeltaPair(p1, p2,delta1, delta1Sign, true);
+    data.requestDeltaPair(p3, p2, delta2, delta2Sign, true);
+}
+
+CpuCustomManyParticleForce::DihedralTermInfo::DihedralTermInfo(const string& name, const vector<int>& atoms, const Lepton::CompiledExpression& forceExpression, ThreadData& data) :
+        name(name), p1(atoms[0]), p2(atoms[1]), p3(atoms[2]), p4(atoms[3]), forceExpression(forceExpression) {
+    variableIndex = data.expressionSet.getVariableIndex(name);
+    float sign;
+    data.requestDeltaPair(p2, p1, delta1, sign, false);
+    data.requestDeltaPair(p2, p3, delta2, sign, false);
+    data.requestDeltaPair(p4, p3, delta3, sign, false);
+}
+
+CpuCustomManyParticleForce::ThreadData::ThreadData(const CustomManyParticleForce& force, Lepton::ParsedExpression& energyExpr,
+            map<string, vector<int> >& distances, map<string, vector<int> >& angles, map<string, vector<int> >& dihedrals) {
+    int numParticlesPerSet = force.getNumParticlesPerSet();
+    int numPerParticleParameters = force.getNumPerParticleParameters();
+    particleParamIndices.resize(numParticlesPerSet);
+    permutedParticles.resize(numParticlesPerSet);
+    f.resize(numParticlesPerSet);
+    energyExpression = energyExpr.createCompiledExpression();
+    expressionSet.registerExpression(energyExpression);
+
+    // Differentiate the energy to get expressions for the force.
+
+    for (int i = 0; i < numParticlesPerSet; i++) {
+        stringstream xname, yname, zname;
+        xname << 'x' << (i+1);
+        yname << 'y' << (i+1);
+        zname << 'z' << (i+1);
+        particleTerms.push_back(CpuCustomManyParticleForce::ParticleTermInfo(xname.str(), i, 0, energyExpr.differentiate(xname.str()).optimize().createCompiledExpression(), *this));
+        particleTerms.push_back(CpuCustomManyParticleForce::ParticleTermInfo(yname.str(), i, 1, energyExpr.differentiate(yname.str()).optimize().createCompiledExpression(), *this));
+        particleTerms.push_back(CpuCustomManyParticleForce::ParticleTermInfo(zname.str(), i, 2, energyExpr.differentiate(zname.str()).optimize().createCompiledExpression(), *this));
+        for (int j = 0; j < numPerParticleParameters; j++) {
+            stringstream paramname;
+            paramname << force.getPerParticleParameterName(j) << (i+1);
+            particleParamIndices[i].push_back(expressionSet.getVariableIndex(paramname.str()));
+        }
+    }
+    for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter)
+        dihedralTerms.push_back(CpuCustomManyParticleForce::DihedralTermInfo(iter->first, iter->second, energyExpr.differentiate(iter->first).optimize().createCompiledExpression(), *this));
+    for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter)
+        distanceTerms.push_back(CpuCustomManyParticleForce::DistanceTermInfo(iter->first, iter->second, energyExpr.differentiate(iter->first).optimize().createCompiledExpression(), *this));
+    for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter)
+        angleTerms.push_back(CpuCustomManyParticleForce::AngleTermInfo(iter->first, iter->second, energyExpr.differentiate(iter->first).optimize().createCompiledExpression(), *this));
+    for (int i = 0; i < particleTerms.size(); i++)
+        expressionSet.registerExpression(particleTerms[i].forceExpression);
+    for (int i = 0; i < distanceTerms.size(); i++)
+        expressionSet.registerExpression(distanceTerms[i].forceExpression);
+    for (int i = 0; i < angleTerms.size(); i++)
+        expressionSet.registerExpression(angleTerms[i].forceExpression);
+    for (int i = 0; i < dihedralTerms.size(); i++)
+        expressionSet.registerExpression(dihedralTerms[i].forceExpression);
+    int numDeltas = deltaPairs.size();
+    delta.resize(numDeltas);
+    normDelta.resize(numDeltas);
+    norm2Delta.resize(numDeltas);
+    cross1.resize(numDeltas);
+    cross2.resize(numDeltas);
+    
+}
+
+void CpuCustomManyParticleForce::ThreadData::requestDeltaPair(int p1, int p2, int& pairIndex, float& pairSign, bool allowReversed) {
+    for (int i = 0; i < (int) deltaPairs.size(); i++) {
+        if (deltaPairs[i].first == p1 && deltaPairs[i].second == p2) {
+            pairIndex = i;
+            pairSign = 1;
+            return;
+        }
+        if (deltaPairs[i].first == p2 && deltaPairs[i].second == p1 && allowReversed) {
+            pairIndex = i;
+            pairSign = -1;
+            return;
+        }
+    }
+    pairIndex = deltaPairs.size();
+    pairSign = 1;
+    deltaPairs.push_back(make_pair(p1, p2));
+}