Commit 7c54418c authored by peastman's avatar peastman
Browse files

Bug fixes to PNaCl vectors

parent 0bb293f8
......@@ -44,8 +44,8 @@ static bool isVec4Supported() {
return true;
}
typedef float __m128 __attribute__((vector_size(16)));
typedef int __m128i __attribute__((vector_size(16)));
typedef float __m128 __attribute__((vector_size(16), aligned(4)));
typedef int __m128i __attribute__((vector_size(16), aligned(4)));
class ivec4;
......@@ -280,7 +280,7 @@ static inline ivec4 abs(const ivec4& v) {
return ivec4(abs(v[0]), abs(v[1]), abs(v[2]), abs(v[3]));
}
static inline bool any(const ivec4& v) {
static inline bool any(const __m128i& v) {
return (v[0] || v[1] || v[2] || v[3]);
}
......@@ -304,7 +304,7 @@ static inline fvec4 operator/(float v1, const fvec4& v2) {
// Operations for blending fvec4s based on an ivec4.
static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const ivec4& mask) {
static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask) {
return fvec4(mask[0] ? v2[0] : v1[0], mask[1] ? v2[1] : v1[1], mask[2] ? v2[2] : v1[2], mask[3] ? v2[3] : v1[3]);
}
......
......@@ -410,18 +410,10 @@ fvec4 CpuGBSAOBCForce::fastLog(const fvec4& x) {
return fvec4(logf(x[0]), logf(x[1]), logf(x[2]), logf(x[3]));
fvec4 coeff2 = x1-index;
fvec4 coeff1 = 1.0f-coeff2;
#ifdef __PNACL__
// PNaCl crashes on unaligned loads.
fvec4 t1(logTable[index[0]], logTable[index[0]+1], logTable[index[0]+2], logTable[index[0]+3]);
fvec4 t2(logTable[index[1]], logTable[index[1]+1], logTable[index[1]+2], logTable[index[1]+3]);
fvec4 t3(logTable[index[2]], logTable[index[2]+1], logTable[index[2]+2], logTable[index[2]+3]);
fvec4 t4(logTable[index[3]], logTable[index[3]+1], logTable[index[3]+2], logTable[index[3]+3]);
#else
fvec4 t1(&logTable[index[0]]);
fvec4 t2(&logTable[index[1]]);
fvec4 t3(&logTable[index[2]]);
fvec4 t4(&logTable[index[3]]);
#endif
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
......@@ -292,18 +292,10 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
ivec4 index = min(floor(x1), NUM_TABLE_POINTS);
fvec4 coeff2 = x1-index;
fvec4 coeff1 = 1.0f-coeff2;
#ifdef __PNACL__
// PNaCl crashes on unaligned loads.
fvec4 t1(ewaldScaleTable[index[0]], ewaldScaleTable[index[0]+1], ewaldScaleTable[index[0]+2], ewaldScaleTable[index[0]+3]);
fvec4 t2(ewaldScaleTable[index[1]], ewaldScaleTable[index[1]+1], ewaldScaleTable[index[1]+2], ewaldScaleTable[index[1]+3]);
fvec4 t3(ewaldScaleTable[index[2]], ewaldScaleTable[index[2]+1], ewaldScaleTable[index[2]+2], ewaldScaleTable[index[2]+3]);
fvec4 t4(ewaldScaleTable[index[3]], ewaldScaleTable[index[3]+1], ewaldScaleTable[index[3]+2], ewaldScaleTable[index[3]+3]);
#else
fvec4 t1(&ewaldScaleTable[index[0]]);
fvec4 t2(&ewaldScaleTable[index[1]]);
fvec4 t3(&ewaldScaleTable[index[2]]);
fvec4 t4(&ewaldScaleTable[index[3]]);
#endif
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment