Commit 57045028 authored by peastman's avatar peastman
Browse files

Merge pull request #831 from peastman/pvec

Optimizations to PNaCl vectors
parents 8f39837a d51ad3c3
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -217,28 +217,8 @@ inline ivec4::operator fvec4() const {
// Functions that operate on fvec4s.
static inline fvec4 floor(const fvec4& v) {
return fvec4(std::floor(v[0]), std::floor(v[1]), std::floor(v[2]), std::floor(v[3]));
}
static inline fvec4 ceil(const fvec4& v) {
return fvec4(std::ceil(v[0]), std::ceil(v[1]), std::ceil(v[2]), std::ceil(v[3]));
}
static inline fvec4 round(const fvec4& v) {
return fvec4(std::round(v[0]), std::round(v[1]), std::round(v[2]), std::round(v[3]));
}
static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
return fvec4(std::min(v1[0], v2[0]), std::min(v1[1], v2[1]), std::min(v1[2], v2[2]), std::min(v1[3], v2[3]));
}
static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
return fvec4(std::max(v1[0], v2[0]), std::max(v1[1], v2[1]), std::max(v1[2], v2[2]), std::max(v1[3], v2[3]));
}
static inline fvec4 abs(const fvec4& v) {
return fvec4(std::abs(v[0]), std::abs(v[1]), std::abs(v[2]), std::abs(v[3]));
return v&(__m128) ivec4(0x7FFFFFFF);
}
static inline fvec4 sqrt(const fvec4& v) {
......@@ -252,7 +232,8 @@ static inline float dot3(const fvec4& v1, const fvec4& v2) {
static inline float dot4(const fvec4& v1, const fvec4& v2) {
fvec4 r = v1*v2;
return r[0]+r[1]+r[2]+r[3];
fvec4 temp = __builtin_shufflevector(r.val, r.val, 0, 1, -1, -1)+__builtin_shufflevector(r.val, r.val, 2, 3, -1, -1);
return temp[0]+temp[1];
}
static inline fvec4 cross(const fvec4& v1, const fvec4& v2) {
......@@ -287,7 +268,8 @@ static inline ivec4 abs(const ivec4& v) {
}
static inline bool any(const __m128i& v) {
return (v[0] || v[1] || v[2] || v[3]);
ivec4 temp = __builtin_shufflevector(v, v, 0, 1, -1, -1) | __builtin_shufflevector(v, v, 2, 3, -1, -1);
return (temp[0] || temp[1]);
}
// Mathematical operators involving a scalar and a vector.
......@@ -311,7 +293,33 @@ static inline fvec4 operator/(float v1, const fvec4& v2) {
// Operations for blending fvec4s based on an ivec4.
static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask) {
return fvec4(mask[0] ? v2[0] : v1[0], mask[1] ? v2[1] : v1[1], mask[2] ? v2[2] : v1[2], mask[3] ? v2[3] : v1[3]);
return (__m128) ((mask&(__m128i)v2) + ((ivec4(0xFFFFFFFF)-ivec4(mask))&(__m128i)v1));
}
// These are at the end since they involve other functions defined above.
static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
return blend(v1, v2, v1 > v2);
}
static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
return blend(v1, v2, v1 < v2);
}
static inline fvec4 round(const fvec4& v) {
fvec4 shift(0x1.0p23f);
fvec4 absResult = (abs(v)+shift)-shift;
return (__m128) ((ivec4(0x80000000)&(__m128i)v) + (ivec4(0x7FFFFFFF)&(__m128i)absResult));
}
static inline fvec4 floor(const fvec4& v) {
fvec4 rounded = round(v);
return rounded + blend(0.0f, -1.0f, rounded>v);
}
static inline fvec4 ceil(const fvec4& v) {
fvec4 rounded = round(v);
return rounded + blend(0.0f, 1.0f, rounded<v);
}
#endif /*OPENMM_VECTORIZE_PNACL_H_*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment