Merge pull request #831 from peastman/pvec

Optimizations to PNaCl vectors

Merge pull request #831 from peastman/pvec
Optimizations to PNaCl vectors
57045028 · peastman · 8f39837a · d51ad3c3 · 57045028
Commit 57045028 authored Feb 24, 2015 by peastman
Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 25 deletions

openmmapi/include/openmm/internal/vectorize_pnacl.h openmmapi/include/openmm/internal/vectorize_pnacl.h +33 -25

No files found.
--- a/openmmapi/include/openmm/internal/vectorize_pnacl.h
+++ b/openmmapi/include/openmm/internal/vectorize_pnacl.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2014 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -217,28 +217,8 @@ inline ivec4::operator fvec4() const {
 // Functions that operate on fvec4s.
-static inline fvec4 floor(const fvec4& v) {
-    return fvec4(std::floor(v[0]), std::floor(v[1]), std::floor(v[2]), std::floor(v[3]));
-}
-static inline fvec4 ceil(const fvec4& v) {
-    return fvec4(std::ceil(v[0]), std::ceil(v[1]), std::ceil(v[2]), std::ceil(v[3]));
-}
-static inline fvec4 round(const fvec4& v) {
-    return fvec4(std::round(v[0]), std::round(v[1]), std::round(v[2]), std::round(v[3]));
-}
-static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
-    return fvec4(std::min(v1[0], v2[0]), std::min(v1[1], v2[1]), std::min(v1[2], v2[2]), std::min(v1[3], v2[3]));
-}
-static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
-    return fvec4(std::max(v1[0], v2[0]), std::max(v1[1], v2[1]), std::max(v1[2], v2[2]), std::max(v1[3], v2[3]));
-}
 static inline fvec4 abs(const fvec4& v) {
-    return fvec4(std::abs(v[0]), std::abs(v[1]), std::abs(v[2]), std::abs(v[3]));
+    return v&(__m128) ivec4(0x7FFFFFFF);
 }
 static inline fvec4 sqrt(const fvec4& v) {
@@ -252,7 +232,8 @@ static inline float dot3(const fvec4& v1, const fvec4& v2) {
 static inline float dot4(const fvec4& v1, const fvec4& v2) {
    fvec4 r = v1*v2;
-    return r[0]+r[1]+r[2]+r[3];
+    fvec4 temp = __builtin_shufflevector(r.val, r.val, 0, 1, -1, -1)+__builtin_shufflevector(r.val, r.val, 2, 3, -1, -1);
+    return temp[0]+temp[1];
 }
 static inline fvec4 cross(const fvec4& v1, const fvec4& v2) {
@@ -287,7 +268,8 @@ static inline ivec4 abs(const ivec4& v) {
 }
 static inline bool any(const __m128i& v) {
-    return (v[0] || v[1] || v[2] || v[3]);
+    ivec4 temp = __builtin_shufflevector(v, v, 0, 1, -1, -1) | __builtin_shufflevector(v, v, 2, 3, -1, -1);
+    return (temp[0] || temp[1]);
 }
 // Mathematical operators involving a scalar and a vector.
@@ -311,7 +293,33 @@ static inline fvec4 operator/(float v1, const fvec4& v2) {
 // Operations for blending fvec4s based on an ivec4.
 static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask) {
-    return fvec4(mask[0] ? v2[0] : v1[0], mask[1] ? v2[1] : v1[1], mask[2] ? v2[2] : v1[2], mask[3] ? v2[3] : v1[3]);
+    return (__m128) ((mask&(__m128i)v2) + ((ivec4(0xFFFFFFFF)-ivec4(mask))&(__m128i)v1));
+}
+// These are at the end since they involve other functions defined above.
+static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
+    return blend(v1, v2, v1 > v2);
+}
+static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
+    return blend(v1, v2, v1 < v2);
+}
+static inline fvec4 round(const fvec4& v) {
+    fvec4 shift(0x1.0p23f);
+    fvec4 absResult = (abs(v)+shift)-shift;
+    return (__m128) ((ivec4(0x80000000)&(__m128i)v) + (ivec4(0x7FFFFFFF)&(__m128i)absResult));
+}
+static inline fvec4 floor(const fvec4& v) {
+    fvec4 rounded = round(v);
+    return rounded + blend(0.0f, -1.0f, rounded>v);
+}
+static inline fvec4 ceil(const fvec4& v) {
+    fvec4 rounded = round(v);
+    return rounded + blend(0.0f, 1.0f, rounded<v);
 }
 #endif /*OPENMM_VECTORIZE_PNACL_H_*/