Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f83289a4
"...cpu/ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "bb3073d4f3722abeb4f215f358f5ae709c996ad0"
Commit
f83289a4
authored
May 24, 2020
by
Marc Marí
Browse files
Pass by value in vectorization API
parent
828ae646
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
284 additions
and
284 deletions
+284
-284
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+40
-40
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+57
-57
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+63
-63
openmmapi/include/openmm/internal/vectorize_ppc.h
openmmapi/include/openmm/internal/vectorize_ppc.h
+67
-67
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+57
-57
No files found.
openmmapi/include/openmm/internal/vectorize8.h
View file @
f83289a4
...
@@ -75,55 +75,55 @@ public:
...
@@ -75,55 +75,55 @@ public:
void
store
(
float
*
v
)
const
{
void
store
(
float
*
v
)
const
{
_mm256_storeu_ps
(
v
,
val
);
_mm256_storeu_ps
(
v
,
val
);
}
}
fvec8
operator
+
(
const
fvec8
&
other
)
const
{
fvec8
operator
+
(
fvec8
other
)
const
{
return
_mm256_add_ps
(
val
,
other
);
return
_mm256_add_ps
(
val
,
other
);
}
}
fvec8
operator
-
(
const
fvec8
&
other
)
const
{
fvec8
operator
-
(
fvec8
other
)
const
{
return
_mm256_sub_ps
(
val
,
other
);
return
_mm256_sub_ps
(
val
,
other
);
}
}
fvec8
operator
*
(
const
fvec8
&
other
)
const
{
fvec8
operator
*
(
fvec8
other
)
const
{
return
_mm256_mul_ps
(
val
,
other
);
return
_mm256_mul_ps
(
val
,
other
);
}
}
fvec8
operator
/
(
const
fvec8
&
other
)
const
{
fvec8
operator
/
(
fvec8
other
)
const
{
return
_mm256_div_ps
(
val
,
other
);
return
_mm256_div_ps
(
val
,
other
);
}
}
void
operator
+=
(
const
fvec8
&
other
)
{
void
operator
+=
(
fvec8
other
)
{
val
=
_mm256_add_ps
(
val
,
other
);
val
=
_mm256_add_ps
(
val
,
other
);
}
}
void
operator
-=
(
const
fvec8
&
other
)
{
void
operator
-=
(
fvec8
other
)
{
val
=
_mm256_sub_ps
(
val
,
other
);
val
=
_mm256_sub_ps
(
val
,
other
);
}
}
void
operator
*=
(
const
fvec8
&
other
)
{
void
operator
*=
(
fvec8
other
)
{
val
=
_mm256_mul_ps
(
val
,
other
);
val
=
_mm256_mul_ps
(
val
,
other
);
}
}
void
operator
/=
(
const
fvec8
&
other
)
{
void
operator
/=
(
fvec8
other
)
{
val
=
_mm256_div_ps
(
val
,
other
);
val
=
_mm256_div_ps
(
val
,
other
);
}
}
fvec8
operator
-
()
const
{
fvec8
operator
-
()
const
{
return
_mm256_sub_ps
(
_mm256_set1_ps
(
0.0
f
),
val
);
return
_mm256_sub_ps
(
_mm256_set1_ps
(
0.0
f
),
val
);
}
}
fvec8
operator
&
(
const
fvec8
&
other
)
const
{
fvec8
operator
&
(
fvec8
other
)
const
{
return
_mm256_and_ps
(
val
,
other
);
return
_mm256_and_ps
(
val
,
other
);
}
}
fvec8
operator
|
(
const
fvec8
&
other
)
const
{
fvec8
operator
|
(
fvec8
&
other
)
const
{
return
_mm256_or_ps
(
val
,
other
);
return
_mm256_or_ps
(
val
,
other
);
}
}
fvec8
operator
==
(
const
fvec8
&
other
)
const
{
fvec8
operator
==
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_EQ_OQ
);
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_EQ_OQ
);
}
}
fvec8
operator
!=
(
const
fvec8
&
other
)
const
{
fvec8
operator
!=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_NEQ_OQ
);
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_NEQ_OQ
);
}
}
fvec8
operator
>
(
const
fvec8
&
other
)
const
{
fvec8
operator
>
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GT_OQ
);
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GT_OQ
);
}
}
fvec8
operator
<
(
const
fvec8
&
other
)
const
{
fvec8
operator
<
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LT_OQ
);
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LT_OQ
);
}
}
fvec8
operator
>=
(
const
fvec8
&
other
)
const
{
fvec8
operator
>=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GE_OQ
);
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_GE_OQ
);
}
}
fvec8
operator
<=
(
const
fvec8
&
other
)
const
{
fvec8
operator
<=
(
fvec8
other
)
const
{
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LE_OQ
);
return
_mm256_cmp_ps
(
val
,
other
,
_CMP_LE_OQ
);
}
}
operator
ivec8
()
const
;
operator
ivec8
()
const
;
...
@@ -159,10 +159,10 @@ public:
...
@@ -159,10 +159,10 @@ public:
void
store
(
int
*
v
)
const
{
void
store
(
int
*
v
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
v
,
val
);
_mm256_storeu_si256
((
__m256i
*
)
v
,
val
);
}
}
ivec8
operator
&
(
const
ivec8
&
other
)
const
{
ivec8
operator
&
(
ivec8
other
)
const
{
return
_mm256_castps_si256
(
_mm256_and_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
return
_mm256_castps_si256
(
_mm256_and_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
}
}
ivec8
operator
|
(
const
ivec8
&
other
)
const
{
ivec8
operator
|
(
ivec8
other
)
const
{
return
_mm256_castps_si256
(
_mm256_or_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
return
_mm256_castps_si256
(
_mm256_or_ps
(
_mm256_castsi256_ps
(
val
),
_mm256_castsi256_ps
(
other
.
val
)));
}
}
operator
fvec8
()
const
;
operator
fvec8
()
const
;
...
@@ -193,36 +193,36 @@ inline fvec8 fvec8::expandBitsToMask(int bitmask) {
...
@@ -193,36 +193,36 @@ inline fvec8 fvec8::expandBitsToMask(int bitmask) {
// Functions that operate on fvec8s.
// Functions that operate on fvec8s.
static
inline
fvec8
floor
(
const
fvec8
&
v
)
{
static
inline
fvec8
floor
(
fvec8
v
)
{
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
0x09
));
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
0x09
));
}
}
static
inline
fvec8
ceil
(
const
fvec8
&
v
)
{
static
inline
fvec8
ceil
(
fvec8
v
)
{
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
0x0A
));
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
0x0A
));
}
}
static
inline
fvec8
round
(
const
fvec8
&
v
)
{
static
inline
fvec8
round
(
fvec8
v
)
{
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
return
fvec8
(
_mm256_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
}
static
inline
fvec8
min
(
const
fvec8
&
v1
,
const
fvec8
&
v2
)
{
static
inline
fvec8
min
(
fvec8
v1
,
fvec8
v2
)
{
return
fvec8
(
_mm256_min_ps
(
v1
.
val
,
v2
.
val
));
return
fvec8
(
_mm256_min_ps
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
fvec8
max
(
const
fvec8
&
v1
,
const
fvec8
&
v2
)
{
static
inline
fvec8
max
(
fvec8
v1
,
fvec8
v2
)
{
return
fvec8
(
_mm256_max_ps
(
v1
.
val
,
v2
.
val
));
return
fvec8
(
_mm256_max_ps
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
fvec8
abs
(
const
fvec8
&
v
)
{
static
inline
fvec8
abs
(
fvec8
v
)
{
static
const
__m256
mask
=
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
0x7FFFFFFF
));
static
const
__m256
mask
=
_mm256_castsi256_ps
(
_mm256_set1_epi32
(
0x7FFFFFFF
));
return
fvec8
(
_mm256_and_ps
(
v
.
val
,
mask
));
return
fvec8
(
_mm256_and_ps
(
v
.
val
,
mask
));
}
}
static
inline
fvec8
sqrt
(
const
fvec8
&
v
)
{
static
inline
fvec8
sqrt
(
fvec8
v
)
{
return
fvec8
(
_mm256_sqrt_ps
(
v
.
val
));
return
fvec8
(
_mm256_sqrt_ps
(
v
.
val
));
}
}
static
inline
fvec8
rsqrt
(
const
fvec8
&
v
)
{
static
inline
fvec8
rsqrt
(
fvec8
v
)
{
// Initial estimate of rsqrt().
// Initial estimate of rsqrt().
fvec8
y
(
_mm256_rsqrt_ps
(
v
.
val
));
fvec8
y
(
_mm256_rsqrt_ps
(
v
.
val
));
...
@@ -234,17 +234,17 @@ static inline fvec8 rsqrt(const fvec8& v) {
...
@@ -234,17 +234,17 @@ static inline fvec8 rsqrt(const fvec8& v) {
return
y
;
return
y
;
}
}
static
inline
float
dot8
(
const
fvec8
&
v1
,
const
fvec8
&
v2
)
{
static
inline
float
dot8
(
fvec8
v1
,
fvec8
v2
)
{
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
fvec8
result
=
_mm256_dp_ps
(
v1
,
v2
,
0xF1
);
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
return
_mm_cvtss_f32
(
result
.
lowerVec
())
+
_mm_cvtss_f32
(
result
.
upperVec
());
}
}
static
inline
float
reduceAdd
(
const
fvec8
v
)
{
static
inline
float
reduceAdd
(
fvec8
v
)
{
// :TODO: There are more efficient ways to do this.
// :TODO: There are more efficient ways to do this.
return
dot8
(
v
,
fvec8
(
1.0
f
));
return
dot8
(
v
,
fvec8
(
1.0
f
));
}
}
static
inline
void
transpose
(
const
fvec4
&
in1
,
const
fvec4
&
in2
,
const
fvec4
&
in3
,
const
fvec4
&
in4
,
const
fvec4
&
in5
,
const
fvec4
&
in6
,
const
fvec4
&
in7
,
const
fvec4
&
in8
,
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
static
inline
void
transpose
(
fvec4
in1
,
fvec4
in2
,
fvec4
in3
,
fvec4
in4
,
fvec4
in5
,
fvec4
in6
,
fvec4
in7
,
fvec4
in8
,
fvec8
&
out1
,
fvec8
&
out2
,
fvec8
&
out3
,
fvec8
&
out4
)
{
fvec4
i1
=
in1
,
i2
=
in2
,
i3
=
in3
,
i4
=
in4
;
fvec4
i1
=
in1
,
i2
=
in2
,
i3
=
in3
,
i4
=
in4
;
fvec4
i5
=
in5
,
i6
=
in6
,
i7
=
in7
,
i8
=
in8
;
fvec4
i5
=
in5
,
i6
=
in6
,
i7
=
in7
,
i8
=
in8
;
_MM_TRANSPOSE4_PS
(
i1
,
i2
,
i3
,
i4
);
_MM_TRANSPOSE4_PS
(
i1
,
i2
,
i3
,
i4
);
...
@@ -275,7 +275,7 @@ static inline void transpose(const fvec4 in[8], fvec8& out1, fvec8& out2, fvec8&
...
@@ -275,7 +275,7 @@ static inline void transpose(const fvec4 in[8], fvec8& out1, fvec8& out2, fvec8&
transpose
(
in
[
0
],
in
[
1
],
in
[
2
],
in
[
3
],
in
[
4
],
in
[
5
],
in
[
6
],
in
[
7
],
out1
,
out2
,
out3
,
out4
);
transpose
(
in
[
0
],
in
[
1
],
in
[
2
],
in
[
3
],
in
[
4
],
in
[
5
],
in
[
6
],
in
[
7
],
out1
,
out2
,
out3
,
out4
);
}
}
static
inline
void
transpose
(
const
fvec8
&
in1
,
const
fvec8
&
in2
,
const
fvec8
&
in3
,
const
fvec8
&
in4
,
fvec4
&
out1
,
fvec4
&
out2
,
fvec4
&
out3
,
fvec4
&
out4
,
fvec4
&
out5
,
fvec4
&
out6
,
fvec4
&
out7
,
fvec4
&
out8
)
{
static
inline
void
transpose
(
fvec8
in1
,
fvec8
in2
,
fvec8
in3
,
fvec8
in4
,
fvec4
&
out1
,
fvec4
&
out2
,
fvec4
&
out3
,
fvec4
&
out4
,
fvec4
&
out5
,
fvec4
&
out6
,
fvec4
&
out7
,
fvec4
&
out8
)
{
out1
=
in1
.
lowerVec
();
out1
=
in1
.
lowerVec
();
out2
=
in2
.
lowerVec
();
out2
=
in2
.
lowerVec
();
out3
=
in3
.
lowerVec
();
out3
=
in3
.
lowerVec
();
...
@@ -291,40 +291,40 @@ static inline void transpose(const fvec8& in1, const fvec8& in2, const fvec8& in
...
@@ -291,40 +291,40 @@ static inline void transpose(const fvec8& in1, const fvec8& in2, const fvec8& in
/**
/**
* Given 4 input vectors of 8 elements, transpose them to form 8 output vectors of 4 elements.
* Given 4 input vectors of 8 elements, transpose them to form 8 output vectors of 4 elements.
*/
*/
static
inline
void
transpose
(
const
fvec8
&
in1
,
const
fvec8
&
in2
,
const
fvec8
&
in3
,
const
fvec8
&
in4
,
fvec4
out
[
8
])
{
static
inline
void
transpose
(
fvec8
in1
,
fvec8
in2
,
fvec8
in3
,
fvec8
in4
,
fvec4
out
[
8
])
{
transpose
(
in1
,
in2
,
in3
,
in4
,
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
],
out
[
4
],
out
[
5
],
out
[
6
],
out
[
7
]);
transpose
(
in1
,
in2
,
in3
,
in4
,
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
],
out
[
4
],
out
[
5
],
out
[
6
],
out
[
7
]);
}
}
// Functions that operate on ivec8s.
// Functions that operate on ivec8s.
static
inline
bool
any
(
const
ivec8
&
v
)
{
static
inline
bool
any
(
ivec8
v
)
{
return
!
_mm256_testz_si256
(
v
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
return
!
_mm256_testz_si256
(
v
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
}
}
// Mathematical operators involving a scalar and a vector.
// Mathematical operators involving a scalar and a vector.
static
inline
fvec8
operator
+
(
float
v1
,
const
fvec8
&
v2
)
{
static
inline
fvec8
operator
+
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
+
v2
;
return
fvec8
(
v1
)
+
v2
;
}
}
static
inline
fvec8
operator
-
(
float
v1
,
const
fvec8
&
v2
)
{
static
inline
fvec8
operator
-
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
-
v2
;
return
fvec8
(
v1
)
-
v2
;
}
}
static
inline
fvec8
operator
*
(
float
v1
,
const
fvec8
&
v2
)
{
static
inline
fvec8
operator
*
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
*
v2
;
return
fvec8
(
v1
)
*
v2
;
}
}
static
inline
fvec8
operator
/
(
float
v1
,
const
fvec8
&
v2
)
{
static
inline
fvec8
operator
/
(
float
v1
,
fvec8
v2
)
{
return
fvec8
(
v1
)
/
v2
;
return
fvec8
(
v1
)
/
v2
;
}
}
// Operation for blending fvec8 from a full bitmask.
// Operation for blending fvec8 from a full bitmask.
static
inline
fvec8
blend
(
const
fvec8
&
v1
,
const
fvec8
&
v2
,
const
fvec8
&
mask
)
{
static
inline
fvec8
blend
(
fvec8
v1
,
fvec8
v2
,
fvec8
mask
)
{
return
fvec8
(
_mm256_blendv_ps
(
v1
.
val
,
v2
.
val
,
mask
.
val
));
return
fvec8
(
_mm256_blendv_ps
(
v1
.
val
,
v2
.
val
,
mask
.
val
));
}
}
static
inline
fvec8
blendZero
(
const
fvec8
v
,
const
fvec8
mask
)
{
static
inline
fvec8
blendZero
(
fvec8
v
,
fvec8
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
return
blend
(
0.0
f
,
v
,
mask
);
}
}
...
@@ -333,7 +333,7 @@ static inline fvec8 blendZero(const fvec8 v, const fvec8 mask) {
...
@@ -333,7 +333,7 @@ static inline fvec8 blendZero(const fvec8 v, const fvec8 mask) {
* of vectors. The first result vector contains the values at the given indexes, and the second
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
* result vector contains the values from each respective index+1.
*/
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec8
index
,
fvec8
&
out0
,
fvec8
&
out1
)
{
static
inline
void
gatherVecPair
(
const
float
*
table
,
ivec8
index
,
fvec8
&
out0
,
fvec8
&
out1
)
{
const
auto
lower
=
index
.
lowerVec
();
const
auto
lower
=
index
.
lowerVec
();
const
auto
upper
=
index
.
upperVec
();
const
auto
upper
=
index
.
upperVec
();
...
@@ -368,7 +368,7 @@ static inline void gatherVecPair(const float* table, const ivec8 index, fvec8& o
...
@@ -368,7 +368,7 @@ static inline void gatherVecPair(const float* table, const ivec8 index, fvec8& o
* output[2] = (Z0 + Z1 + Z2 + ...)
* output[2] = (Z0 + Z1 + Z2 + ...)
* output[3] = undefined
* output[3] = undefined
*/
*/
static
inline
fvec4
reduceToVec3
(
const
fvec8
x
,
const
fvec8
y
,
const
fvec8
z
)
{
static
inline
fvec4
reduceToVec3
(
fvec8
x
,
fvec8
y
,
fvec8
z
)
{
// The general strategy for a vector reduce-add operation is to take values from
// The general strategy for a vector reduce-add operation is to take values from
// different parts of the vector and overlap them a different part of the vector and then
// different parts of the vector and overlap them a different part of the vector and then
// add together. Repeat this several times until all values have been summed. Initially 8
// add together. Repeat this several times until all values have been summed. Initially 8
...
...
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
f83289a4
...
@@ -118,16 +118,16 @@ public:
...
@@ -118,16 +118,16 @@ public:
v
[
2
]
=
vgetq_lane_f32
(
val
,
2
);
v
[
2
]
=
vgetq_lane_f32
(
val
,
2
);
}
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
fvec4
operator
+
(
fvec4
other
)
const
{
return
vaddq_f32
(
val
,
other
);
return
vaddq_f32
(
val
,
other
);
}
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
fvec4
operator
-
(
fvec4
other
)
const
{
return
vsubq_f32
(
val
,
other
);
return
vsubq_f32
(
val
,
other
);
}
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
fvec4
operator
*
(
fvec4
other
)
const
{
return
vmulq_f32
(
val
,
other
);
return
vmulq_f32
(
val
,
other
);
}
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
fvec4
operator
/
(
fvec4
other
)
const
{
// NEON does not have a divide float-point operator, so we get the reciprocal and multiply.
// NEON does not have a divide float-point operator, so we get the reciprocal and multiply.
float32x4_t
reciprocal
=
vrecpeq_f32
(
other
);
float32x4_t
reciprocal
=
vrecpeq_f32
(
other
);
...
@@ -136,43 +136,43 @@ public:
...
@@ -136,43 +136,43 @@ public:
fvec4
result
=
vmulq_f32
(
val
,
reciprocal
);
fvec4
result
=
vmulq_f32
(
val
,
reciprocal
);
return
result
;
return
result
;
}
}
void
operator
+=
(
const
fvec4
&
other
)
{
void
operator
+=
(
fvec4
other
)
{
val
=
vaddq_f32
(
val
,
other
);
val
=
vaddq_f32
(
val
,
other
);
}
}
void
operator
-=
(
const
fvec4
&
other
)
{
void
operator
-=
(
fvec4
other
)
{
val
=
vsubq_f32
(
val
,
other
);
val
=
vsubq_f32
(
val
,
other
);
}
}
void
operator
*=
(
const
fvec4
&
other
)
{
void
operator
*=
(
fvec4
other
)
{
val
=
vmulq_f32
(
val
,
other
);
val
=
vmulq_f32
(
val
,
other
);
}
}
void
operator
/=
(
const
fvec4
&
other
)
{
void
operator
/=
(
fvec4
other
)
{
val
=
*
this
/
other
;
val
=
*
this
/
other
;
}
}
fvec4
operator
-
()
const
{
fvec4
operator
-
()
const
{
return
vnegq_f32
(
val
);
return
vnegq_f32
(
val
);
}
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
fvec4
operator
&
(
fvec4
other
)
const
{
return
vreinterpretq_f32_u32
(
vandq_u32
(
vreinterpretq_u32_f32
(
val
),
vreinterpretq_u32_f32
(
other
)));
return
vreinterpretq_f32_u32
(
vandq_u32
(
vreinterpretq_u32_f32
(
val
),
vreinterpretq_u32_f32
(
other
)));
}
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
fvec4
operator
|
(
fvec4
other
)
const
{
return
vreinterpretq_f32_u32
(
vorrq_u32
(
vreinterpretq_u32_f32
(
val
),
vreinterpretq_u32_f32
(
other
)));
return
vreinterpretq_f32_u32
(
vorrq_u32
(
vreinterpretq_u32_f32
(
val
),
vreinterpretq_u32_f32
(
other
)));
}
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
fvec4
operator
==
(
fvec4
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vceqq_f32
(
val
,
other
)));
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vceqq_f32
(
val
,
other
)));
}
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
fvec4
operator
!=
(
fvec4
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_f32
(
val
,
other
))));
// not(equals(val, other))
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_f32
(
val
,
other
))));
// not(equals(val, other))
}
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
fvec4
operator
>
(
fvec4
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgtq_f32
(
val
,
other
)));
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgtq_f32
(
val
,
other
)));
}
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
fvec4
operator
<
(
fvec4
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcltq_f32
(
val
,
other
)));
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcltq_f32
(
val
,
other
)));
}
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
fvec4
operator
>=
(
fvec4
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgeq_f32
(
val
,
other
)));
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcgeq_f32
(
val
,
other
)));
}
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
fvec4
operator
<=
(
fvec4
other
)
const
{
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcleq_f32
(
val
,
other
)));
return
vcvtq_f32_s32
(
vreinterpretq_s32_u32
(
vcleq_f32
(
val
,
other
)));
}
}
operator
ivec4
()
const
;
operator
ivec4
()
const
;
...
@@ -217,49 +217,49 @@ public:
...
@@ -217,49 +217,49 @@ public:
void
store
(
int
*
v
)
const
{
void
store
(
int
*
v
)
const
{
vst1q_s32
(
v
,
val
);
vst1q_s32
(
v
,
val
);
}
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
ivec4
operator
+
(
ivec4
other
)
const
{
return
vaddq_s32
(
val
,
other
);
return
vaddq_s32
(
val
,
other
);
}
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
ivec4
operator
-
(
ivec4
other
)
const
{
return
vsubq_s32
(
val
,
other
);
return
vsubq_s32
(
val
,
other
);
}
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
ivec4
operator
*
(
ivec4
other
)
const
{
return
vmulq_s32
(
val
,
other
);
return
vmulq_s32
(
val
,
other
);
}
}
void
operator
+=
(
const
ivec4
&
other
)
{
void
operator
+=
(
ivec4
other
)
{
val
=
vaddq_s32
(
val
,
other
);
val
=
vaddq_s32
(
val
,
other
);
}
}
void
operator
-=
(
const
ivec4
&
other
)
{
void
operator
-=
(
ivec4
other
)
{
val
=
vsubq_s32
(
val
,
other
);
val
=
vsubq_s32
(
val
,
other
);
}
}
void
operator
*=
(
const
ivec4
&
other
)
{
void
operator
*=
(
ivec4
other
)
{
val
=
vmulq_s32
(
val
,
other
);
val
=
vmulq_s32
(
val
,
other
);
}
}
ivec4
operator
-
()
const
{
ivec4
operator
-
()
const
{
return
vnegq_s32
(
val
);
return
vnegq_s32
(
val
);
}
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
ivec4
operator
&
(
ivec4
other
)
const
{
return
vandq_s32
(
val
,
other
);
return
vandq_s32
(
val
,
other
);
}
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
ivec4
operator
|
(
ivec4
other
)
const
{
return
vorrq_s32
(
val
,
other
);
return
vorrq_s32
(
val
,
other
);
}
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
ivec4
operator
==
(
ivec4
other
)
const
{
return
vreinterpretq_s32_u32
(
vceqq_s32
(
val
,
other
));
return
vreinterpretq_s32_u32
(
vceqq_s32
(
val
,
other
));
}
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
ivec4
operator
!=
(
ivec4
other
)
const
{
return
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_s32
(
val
,
other
)));
// not(equal(val, other))
return
vreinterpretq_s32_u32
(
vmvnq_u32
(
vceqq_s32
(
val
,
other
)));
// not(equal(val, other))
}
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
ivec4
operator
>
(
ivec4
other
)
const
{
return
vreinterpretq_s32_u32
(
vcgtq_s32
(
val
,
other
));
return
vreinterpretq_s32_u32
(
vcgtq_s32
(
val
,
other
));
}
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
ivec4
operator
<
(
ivec4
other
)
const
{
return
vreinterpretq_s32_u32
(
vcltq_s32
(
val
,
other
));
return
vreinterpretq_s32_u32
(
vcltq_s32
(
val
,
other
));
}
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
ivec4
operator
>=
(
ivec4
other
)
const
{
return
vreinterpretq_s32_u32
(
vcgeq_s32
(
val
,
other
));
return
vreinterpretq_s32_u32
(
vcgeq_s32
(
val
,
other
));
}
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
ivec4
operator
<=
(
ivec4
other
)
const
{
return
vreinterpretq_s32_u32
(
vcleq_s32
(
val
,
other
));
return
vreinterpretq_s32_u32
(
vcleq_s32
(
val
,
other
));
}
}
operator
fvec4
()
const
;
operator
fvec4
()
const
;
...
@@ -283,52 +283,52 @@ inline ivec4 fvec4::expandBitsToMask(int bitmask) {
...
@@ -283,52 +283,52 @@ inline ivec4 fvec4::expandBitsToMask(int bitmask) {
}
}
// Functions that operate on fvec4s.
// Functions that operate on fvec4s.
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
min
(
fvec4
v1
,
fvec4
v2
)
{
return
vminq_f32
(
v1
,
v2
);
return
vminq_f32
(
v1
,
v2
);
}
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
max
(
fvec4
v1
,
fvec4
v2
)
{
return
vmaxq_f32
(
v1
,
v2
);
return
vmaxq_f32
(
v1
,
v2
);
}
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
inline
fvec4
abs
(
fvec4
v
)
{
return
vabsq_f32
(
v
);
return
vabsq_f32
(
v
);
}
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
rsqrt
(
fvec4
v
)
{
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
return
recipSqrt
;
return
recipSqrt
;
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
sqrt
(
fvec4
v
)
{
return
rsqrt
(
v
)
*
v
;
return
rsqrt
(
v
)
*
v
;
}
}
static
inline
fvec4
exp
(
const
fvec4
&
v
)
{
static
inline
fvec4
exp
(
fvec4
v
)
{
return
fvec4
(
exp_ps
(
v
.
val
));
return
fvec4
(
exp_ps
(
v
.
val
));
}
}
static
inline
fvec4
log
(
const
fvec4
&
v
)
{
static
inline
fvec4
log
(
fvec4
v
)
{
return
fvec4
(
log_ps
(
v
.
val
));
return
fvec4
(
log_ps
(
v
.
val
));
}
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
result
=
v1
*
v2
;
fvec4
result
=
v1
*
v2
;
return
vgetq_lane_f32
(
result
,
0
)
+
vgetq_lane_f32
(
result
,
1
)
+
vgetq_lane_f32
(
result
,
2
);
return
vgetq_lane_f32
(
result
,
0
)
+
vgetq_lane_f32
(
result
,
1
)
+
vgetq_lane_f32
(
result
,
2
);
}
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
result
=
v1
*
v2
;
fvec4
result
=
v1
*
v2
;
return
vgetq_lane_f32
(
result
,
0
)
+
vgetq_lane_f32
(
result
,
1
)
+
vgetq_lane_f32
(
result
,
2
)
+
vgetq_lane_f32
(
result
,
3
);
return
vgetq_lane_f32
(
result
,
0
)
+
vgetq_lane_f32
(
result
,
1
)
+
vgetq_lane_f32
(
result
,
2
)
+
vgetq_lane_f32
(
result
,
3
);
}
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
static
inline
float
reduceAdd
(
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
cross
(
fvec4
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
[
1
]
*
v2
[
2
]
-
v1
[
2
]
*
v2
[
1
],
return
fvec4
(
v1
[
1
]
*
v2
[
2
]
-
v1
[
2
]
*
v2
[
1
],
v1
[
2
]
*
v2
[
0
]
-
v1
[
0
]
*
v2
[
2
],
v1
[
2
]
*
v2
[
0
]
-
v1
[
0
]
*
v2
[
2
],
v1
[
0
]
*
v2
[
1
]
-
v1
[
1
]
*
v2
[
0
],
0
);
v1
[
0
]
*
v2
[
1
]
-
v1
[
1
]
*
v2
[
0
],
0
);
...
@@ -356,26 +356,26 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
...
@@ -356,26 +356,26 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
/**
/**
* Out-of-place transpose from named variables into an array.
* Out-of-place transpose from named variables into an array.
*/
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
static
inline
void
transpose
(
fvec4
v0
,
fvec4
v1
,
fvec4
v2
,
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
}
// Functions that operate on ivec4s.
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
return
vminq_s32
(
v1
,
v2
);
return
vminq_s32
(
v1
,
v2
);
}
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
max
(
ivec4
v1
,
ivec4
v2
)
{
return
vmaxq_s32
(
v1
,
v2
);
return
vmaxq_s32
(
v1
,
v2
);
}
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
static
inline
ivec4
abs
(
ivec4
v
)
{
return
vabdq_s32
(
v
,
ivec4
(
0
));
return
vabdq_s32
(
v
,
ivec4
(
0
));
}
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
static
inline
bool
any
(
ivec4
v
)
{
#ifdef __ARM64__
#ifdef __ARM64__
return
(
vmaxvq_u32
(
vreinterpretq_u32_s32
(
v
))
!=
0
);
return
(
vmaxvq_u32
(
vreinterpretq_u32_s32
(
v
))
!=
0
);
#else
#else
...
@@ -385,46 +385,46 @@ static inline bool any(const ivec4& v) {
...
@@ -385,46 +385,46 @@ static inline bool any(const ivec4& v) {
// Mathematical operators involving a scalar and a vector.
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
+
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
+
v2
;
return
fvec4
(
v1
)
+
v2
;
}
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
-
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
-
v2
;
return
fvec4
(
v1
)
-
v2
;
}
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
*
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
*
v2
;
return
fvec4
(
v1
)
*
v2
;
}
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
/
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
/
v2
;
return
fvec4
(
v1
)
/
v2
;
}
}
// Operations for blending fvec4s based on an ivec4.
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
ivec4
&
mask
)
{
static
inline
fvec4
blend
(
fvec4
v1
,
fvec4
v2
,
ivec4
mask
)
{
return
vbslq_f32
(
vreinterpretq_u32_s32
(
mask
),
v2
,
v1
);
return
vbslq_f32
(
vreinterpretq_u32_s32
(
mask
),
v2
,
v1
);
}
}
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
ivec4
mask
)
{
static
inline
fvec4
blendZero
(
fvec4
v
,
ivec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
return
blend
(
0.0
f
,
v
,
mask
);
}
}
// These are at the end since they involve other functions defined above.
// These are at the end since they involve other functions defined above.
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
static
inline
fvec4
round
(
fvec4
v
)
{
fvec4
shift
(
0x1
.0
p23f
);
fvec4
shift
(
0x1
.0
p23f
);
fvec4
absResult
=
(
abs
(
v
)
+
shift
)
-
shift
;
fvec4
absResult
=
(
abs
(
v
)
+
shift
)
-
shift
;
return
blend
(
v
,
absResult
,
ivec4
(
0x7FFFFFFF
));
return
blend
(
v
,
absResult
,
ivec4
(
0x7FFFFFFF
));
}
}
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
static
inline
fvec4
floor
(
fvec4
v
)
{
fvec4
rounded
=
round
(
v
);
fvec4
rounded
=
round
(
v
);
return
rounded
+
blend
(
0.0
f
,
-
1.0
f
,
rounded
>
v
);
return
rounded
+
blend
(
0.0
f
,
-
1.0
f
,
rounded
>
v
);
}
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
static
inline
fvec4
ceil
(
fvec4
v
)
{
fvec4
rounded
=
round
(
v
);
fvec4
rounded
=
round
(
v
);
return
rounded
+
blend
(
0.0
f
,
1.0
f
,
rounded
<
v
);
return
rounded
+
blend
(
0.0
f
,
1.0
f
,
rounded
<
v
);
}
}
...
@@ -433,7 +433,7 @@ static inline fvec4 ceil(const fvec4& v) {
...
@@ -433,7 +433,7 @@ static inline fvec4 ceil(const fvec4& v) {
* of vectors. The first result vector contains the values at the given indexes, and the second
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
* result vector contains the values from each respective index+1.
*/
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
static
inline
void
gatherVecPair
(
const
float
*
table
,
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t2
(
table
+
index
[
2
]);
...
@@ -456,7 +456,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
...
@@ -456,7 +456,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
* output[3] = undefined
*/
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
static
inline
fvec4
reduceToVec3
(
fvec4
x
,
fvec4
y
,
fvec4
z
)
{
const
auto
nx
=
reduceAdd
(
x
);
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
nz
=
reduceAdd
(
z
);
const
auto
nz
=
reduceAdd
(
z
);
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
f83289a4
...
@@ -95,45 +95,45 @@ public:
...
@@ -95,45 +95,45 @@ public:
v
[
1
]
=
val
[
1
];
v
[
1
]
=
val
[
1
];
v
[
2
]
=
val
[
2
];
v
[
2
]
=
val
[
2
];
}
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
fvec4
operator
+
(
fvec4
other
)
const
{
return
val
+
other
;
return
val
+
other
;
}
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
fvec4
operator
-
(
fvec4
other
)
const
{
return
val
-
other
;
return
val
-
other
;
}
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
fvec4
operator
*
(
fvec4
other
)
const
{
return
val
*
other
;
return
val
*
other
;
}
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
fvec4
operator
/
(
fvec4
other
)
const
{
return
val
/
other
;
return
val
/
other
;
}
}
void
operator
+=
(
const
fvec4
&
other
)
{
void
operator
+=
(
fvec4
other
)
{
val
=
val
+
other
;
val
=
val
+
other
;
}
}
void
operator
-=
(
const
fvec4
&
other
)
{
void
operator
-=
(
fvec4
other
)
{
val
=
val
-
other
;
val
=
val
-
other
;
}
}
void
operator
*=
(
const
fvec4
&
other
)
{
void
operator
*=
(
fvec4
other
)
{
val
=
val
*
other
;
val
=
val
*
other
;
}
}
void
operator
/=
(
const
fvec4
&
other
)
{
void
operator
/=
(
fvec4
other
)
{
val
=
val
/
other
;
val
=
val
/
other
;
}
}
fvec4
operator
-
()
const
{
fvec4
operator
-
()
const
{
return
-
val
;
return
-
val
;
}
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
fvec4
operator
&
(
fvec4
other
)
const
{
return
(
fvec4
)
(((
__m128i
)
val
)
&
((
__m128i
)
other
.
val
));
return
(
fvec4
)
(((
__m128i
)
val
)
&
((
__m128i
)
other
.
val
));
}
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
fvec4
operator
|
(
fvec4
other
)
const
{
return
(
fvec4
)
(((
__m128i
)
val
)
|
((
__m128i
)
other
.
val
));
return
(
fvec4
)
(((
__m128i
)
val
)
|
((
__m128i
)
other
.
val
));
}
}
ivec4
operator
==
(
const
fvec4
&
other
)
const
;
ivec4
operator
==
(
fvec4
other
)
const
;
ivec4
operator
!=
(
const
fvec4
&
other
)
const
;
ivec4
operator
!=
(
fvec4
other
)
const
;
ivec4
operator
>
(
const
fvec4
&
other
)
const
;
ivec4
operator
>
(
fvec4
other
)
const
;
ivec4
operator
<
(
const
fvec4
&
other
)
const
;
ivec4
operator
<
(
fvec4
other
)
const
;
ivec4
operator
>=
(
const
fvec4
&
other
)
const
;
ivec4
operator
>=
(
fvec4
other
)
const
;
ivec4
operator
<=
(
const
fvec4
&
other
)
const
;
ivec4
operator
<=
(
fvec4
other
)
const
;
operator
ivec4
()
const
;
operator
ivec4
()
const
;
/**
/**
...
@@ -171,49 +171,49 @@ public:
...
@@ -171,49 +171,49 @@ public:
void
store
(
int
*
v
)
const
{
void
store
(
int
*
v
)
const
{
*
((
__m128
*
)
v
)
=
val
;
*
((
__m128
*
)
v
)
=
val
;
}
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
ivec4
operator
+
(
ivec4
other
)
const
{
return
val
+
other
;
return
val
+
other
;
}
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
ivec4
operator
-
(
ivec4
other
)
const
{
return
val
-
other
;
return
val
-
other
;
}
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
ivec4
operator
*
(
ivec4
other
)
const
{
return
val
*
other
;
return
val
*
other
;
}
}
void
operator
+=
(
const
ivec4
&
other
)
{
void
operator
+=
(
ivec4
other
)
{
val
=
val
+
other
;
val
=
val
+
other
;
}
}
void
operator
-=
(
const
ivec4
&
other
)
{
void
operator
-=
(
ivec4
other
)
{
val
=
val
-
other
;
val
=
val
-
other
;
}
}
void
operator
*=
(
const
ivec4
&
other
)
{
void
operator
*=
(
ivec4
other
)
{
val
=
val
*
other
;
val
=
val
*
other
;
}
}
ivec4
operator
-
()
const
{
ivec4
operator
-
()
const
{
return
-
val
;
return
-
val
;
}
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
ivec4
operator
&
(
ivec4
other
)
const
{
return
val
&
other
.
val
;
return
val
&
other
.
val
;
}
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
ivec4
operator
|
(
ivec4
other
)
const
{
return
val
|
other
.
val
;
return
val
|
other
.
val
;
}
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
ivec4
operator
==
(
ivec4
other
)
const
{
return
(
val
==
other
.
val
);
return
(
val
==
other
.
val
);
}
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
ivec4
operator
!=
(
ivec4
other
)
const
{
return
(
val
!=
other
.
val
);
return
(
val
!=
other
.
val
);
}
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
ivec4
operator
>
(
ivec4
other
)
const
{
return
(
val
>
other
.
val
);
return
(
val
>
other
.
val
);
}
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
ivec4
operator
<
(
ivec4
other
)
const
{
return
(
val
<
other
.
val
);
return
(
val
<
other
.
val
);
}
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
ivec4
operator
>=
(
ivec4
other
)
const
{
return
(
val
>=
other
.
val
);
return
(
val
>=
other
.
val
);
}
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
ivec4
operator
<=
(
ivec4
other
)
const
{
return
(
val
<=
other
.
val
);
return
(
val
<=
other
.
val
);
}
}
operator
fvec4
()
const
;
operator
fvec4
()
const
;
...
@@ -221,27 +221,27 @@ public:
...
@@ -221,27 +221,27 @@ public:
// Conversion operators.
// Conversion operators.
inline
ivec4
fvec4
::
operator
==
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
==
(
fvec4
other
)
const
{
return
(
__m128i
)
(
val
==
other
.
val
);
return
(
__m128i
)
(
val
==
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
!=
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
!=
(
fvec4
other
)
const
{
return
(
__m128i
)
(
val
!=
other
.
val
);
return
(
__m128i
)
(
val
!=
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
>
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
>
(
fvec4
other
)
const
{
return
(
__m128i
)
(
val
>
other
.
val
);
return
(
__m128i
)
(
val
>
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
<
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
<
(
fvec4
other
)
const
{
return
(
__m128i
)
(
val
<
other
.
val
);
return
(
__m128i
)
(
val
<
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
>=
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
>=
(
fvec4
other
)
const
{
return
(
__m128i
)
(
val
>=
other
.
val
);
return
(
__m128i
)
(
val
>=
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
<=
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
<=
(
fvec4
other
)
const
{
return
(
__m128i
)
(
val
<=
other
.
val
);
return
(
__m128i
)
(
val
<=
other
.
val
);
}
}
...
@@ -262,34 +262,34 @@ inline ivec4 fvec4::expandBitsToMask(int bitmask) {
...
@@ -262,34 +262,34 @@ inline ivec4 fvec4::expandBitsToMask(int bitmask) {
// Functions that operate on fvec4s.
// Functions that operate on fvec4s.
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
inline
fvec4
abs
(
fvec4
v
)
{
return
v
&
(
__m128
)
ivec4
(
0x7FFFFFFF
);
return
v
&
(
__m128
)
ivec4
(
0x7FFFFFFF
);
}
}
static
inline
fvec4
exp
(
const
fvec4
&
v
)
{
static
inline
fvec4
exp
(
fvec4
v
)
{
return
fvec4
(
expf
(
v
[
0
]),
expf
(
v
[
1
]),
expf
(
v
[
2
]),
expf
(
v
[
3
]));
return
fvec4
(
expf
(
v
[
0
]),
expf
(
v
[
1
]),
expf
(
v
[
2
]),
expf
(
v
[
3
]));
}
}
static
inline
fvec4
log
(
const
fvec4
&
v
)
{
static
inline
fvec4
log
(
fvec4
v
)
{
return
fvec4
(
logf
(
v
[
0
]),
logf
(
v
[
1
]),
logf
(
v
[
2
]),
logf
(
v
[
3
]));
return
fvec4
(
logf
(
v
[
0
]),
logf
(
v
[
1
]),
logf
(
v
[
2
]),
logf
(
v
[
3
]));
}
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
r
=
v1
*
v2
;
fvec4
r
=
v1
*
v2
;
return
r
[
0
]
+
r
[
1
]
+
r
[
2
];
return
r
[
0
]
+
r
[
1
]
+
r
[
2
];
}
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
r
=
v1
*
v2
;
fvec4
r
=
v1
*
v2
;
fvec4
temp
=
__builtin_shufflevector
(
r
.
val
,
r
.
val
,
0
,
1
,
-
1
,
-
1
)
+
__builtin_shufflevector
(
r
.
val
,
r
.
val
,
2
,
3
,
-
1
,
-
1
);
fvec4
temp
=
__builtin_shufflevector
(
r
.
val
,
r
.
val
,
0
,
1
,
-
1
,
-
1
)
+
__builtin_shufflevector
(
r
.
val
,
r
.
val
,
2
,
3
,
-
1
,
-
1
);
return
temp
[
0
]
+
temp
[
1
];
return
temp
[
0
]
+
temp
[
1
];
}
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
static
inline
float
reduceAdd
(
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
cross
(
fvec4
v1
,
fvec4
v2
)
{
__m128
temp
=
v2
.
val
*
__builtin_shufflevector
(
v1
.
val
,
v1
.
val
,
2
,
0
,
1
,
3
)
-
__m128
temp
=
v2
.
val
*
__builtin_shufflevector
(
v1
.
val
,
v1
.
val
,
2
,
0
,
1
,
3
)
-
v1
.
val
*
__builtin_shufflevector
(
v2
.
val
,
v2
.
val
,
2
,
0
,
1
,
3
);
v1
.
val
*
__builtin_shufflevector
(
v2
.
val
,
v2
.
val
,
2
,
0
,
1
,
3
);
return
__builtin_shufflevector
(
temp
,
temp
,
2
,
0
,
1
,
3
);
return
__builtin_shufflevector
(
temp
,
temp
,
2
,
0
,
1
,
3
);
...
@@ -317,85 +317,85 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
...
@@ -317,85 +317,85 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
/**
/**
* Out-of-place transpose from named variables into an array.
* Out-of-place transpose from named variables into an array.
*/
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
static
inline
void
transpose
(
fvec4
v0
,
fvec4
v1
,
fvec4
v2
,
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
}
// Functions that operate on ivec4s.
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
return
ivec4
(
std
::
min
(
v1
[
0
],
v2
[
0
]),
std
::
min
(
v1
[
1
],
v2
[
1
]),
std
::
min
(
v1
[
2
],
v2
[
2
]),
std
::
min
(
v1
[
3
],
v2
[
3
]));
return
ivec4
(
std
::
min
(
v1
[
0
],
v2
[
0
]),
std
::
min
(
v1
[
1
],
v2
[
1
]),
std
::
min
(
v1
[
2
],
v2
[
2
]),
std
::
min
(
v1
[
3
],
v2
[
3
]));
}
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
max
(
ivec4
v1
,
ivec4
v2
)
{
return
ivec4
(
std
::
max
(
v1
[
0
],
v2
[
0
]),
std
::
max
(
v1
[
1
],
v2
[
1
]),
std
::
max
(
v1
[
2
],
v2
[
2
]),
std
::
max
(
v1
[
3
],
v2
[
3
]));
return
ivec4
(
std
::
max
(
v1
[
0
],
v2
[
0
]),
std
::
max
(
v1
[
1
],
v2
[
1
]),
std
::
max
(
v1
[
2
],
v2
[
2
]),
std
::
max
(
v1
[
3
],
v2
[
3
]));
}
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
static
inline
ivec4
abs
(
ivec4
v
)
{
return
ivec4
(
abs
(
v
[
0
]),
abs
(
v
[
1
]),
abs
(
v
[
2
]),
abs
(
v
[
3
]));
return
ivec4
(
abs
(
v
[
0
]),
abs
(
v
[
1
]),
abs
(
v
[
2
]),
abs
(
v
[
3
]));
}
}
static
inline
bool
any
(
const
__m128i
&
v
)
{
static
inline
bool
any
(
__m128i
v
)
{
ivec4
temp
=
__builtin_shufflevector
(
v
,
v
,
0
,
1
,
-
1
,
-
1
)
|
__builtin_shufflevector
(
v
,
v
,
2
,
3
,
-
1
,
-
1
);
ivec4
temp
=
__builtin_shufflevector
(
v
,
v
,
0
,
1
,
-
1
,
-
1
)
|
__builtin_shufflevector
(
v
,
v
,
2
,
3
,
-
1
,
-
1
);
return
(
temp
[
0
]
||
temp
[
1
]);
return
(
temp
[
0
]
||
temp
[
1
]);
}
}
// Mathematical operators involving a scalar and a vector.
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
+
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
+
v2
;
return
fvec4
(
v1
)
+
v2
;
}
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
-
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
-
v2
;
return
fvec4
(
v1
)
-
v2
;
}
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
*
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
*
v2
;
return
fvec4
(
v1
)
*
v2
;
}
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
/
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
/
v2
;
return
fvec4
(
v1
)
/
v2
;
}
}
// Operations for blending fvec4s based on an ivec4.
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
__m128i
&
mask
)
{
static
inline
fvec4
blend
(
fvec4
v1
,
fvec4
v2
,
__m128i
mask
)
{
return
(
__m128
)
((
mask
&
(
__m128i
)
v2
)
+
((
ivec4
(
0xFFFFFFFF
)
-
ivec4
(
mask
))
&
(
__m128i
)
v1
));
return
(
__m128
)
((
mask
&
(
__m128i
)
v2
)
+
((
ivec4
(
0xFFFFFFFF
)
-
ivec4
(
mask
))
&
(
__m128i
)
v1
));
}
}
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
ivec4
mask
)
{
static
inline
fvec4
blendZero
(
fvec4
v
,
ivec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
return
blend
(
0.0
f
,
v
,
mask
);
}
}
// These are at the end since they involve other functions defined above.
// These are at the end since they involve other functions defined above.
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
min
(
fvec4
v1
,
fvec4
v2
)
{
return
blend
(
v1
,
v2
,
v1
>
v2
);
return
blend
(
v1
,
v2
,
v1
>
v2
);
}
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
max
(
fvec4
v1
,
fvec4
v2
)
{
return
blend
(
v1
,
v2
,
v1
<
v2
);
return
blend
(
v1
,
v2
,
v1
<
v2
);
}
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
static
inline
fvec4
round
(
fvec4
v
)
{
fvec4
shift
(
0x1
.0
p23f
);
fvec4
shift
(
0x1
.0
p23f
);
fvec4
absResult
=
(
abs
(
v
)
+
shift
)
-
shift
;
fvec4
absResult
=
(
abs
(
v
)
+
shift
)
-
shift
;
return
(
__m128
)
((
ivec4
(
0x80000000
)
&
(
__m128i
)
v
)
+
(
ivec4
(
0x7FFFFFFF
)
&
(
__m128i
)
absResult
));
return
(
__m128
)
((
ivec4
(
0x80000000
)
&
(
__m128i
)
v
)
+
(
ivec4
(
0x7FFFFFFF
)
&
(
__m128i
)
absResult
));
}
}
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
static
inline
fvec4
floor
(
fvec4
v
)
{
fvec4
truncated
=
__builtin_convertvector
(
__builtin_convertvector
(
v
.
val
,
__m128i
),
__m128
);
fvec4
truncated
=
__builtin_convertvector
(
__builtin_convertvector
(
v
.
val
,
__m128i
),
__m128
);
return
truncated
+
blend
(
0.0
f
,
-
1.0
f
,
truncated
>
v
);
return
truncated
+
blend
(
0.0
f
,
-
1.0
f
,
truncated
>
v
);
}
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
static
inline
fvec4
ceil
(
fvec4
v
)
{
fvec4
truncated
=
__builtin_convertvector
(
__builtin_convertvector
(
v
.
val
,
__m128i
),
__m128
);
fvec4
truncated
=
__builtin_convertvector
(
__builtin_convertvector
(
v
.
val
,
__m128i
),
__m128
);
return
truncated
+
blend
(
0.0
f
,
1.0
f
,
truncated
<
v
);
return
truncated
+
blend
(
0.0
f
,
1.0
f
,
truncated
<
v
);
}
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
rsqrt
(
fvec4
v
)
{
// Initial estimate of rsqrt().
// Initial estimate of rsqrt().
ivec4
i
=
(
__m128i
)
v
;
ivec4
i
=
(
__m128i
)
v
;
...
@@ -411,7 +411,7 @@ static inline fvec4 rsqrt(const fvec4& v) {
...
@@ -411,7 +411,7 @@ static inline fvec4 rsqrt(const fvec4& v) {
return
y
;
return
y
;
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
sqrt
(
fvec4
v
)
{
return
rsqrt
(
v
)
*
v
;
return
rsqrt
(
v
)
*
v
;
}
}
...
@@ -420,7 +420,7 @@ static inline fvec4 sqrt(const fvec4& v) {
...
@@ -420,7 +420,7 @@ static inline fvec4 sqrt(const fvec4& v) {
* of vectors. The first result vector contains the values at the given indexes, and the second
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
* result vector contains the values from each respective index+1.
*/
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
static
inline
void
gatherVecPair
(
const
float
*
table
,
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t2
(
table
+
index
[
2
]);
...
@@ -443,7 +443,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
...
@@ -443,7 +443,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
* output[3] = undefined
*/
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
static
inline
fvec4
reduceToVec3
(
fvec4
x
,
fvec4
y
,
fvec4
z
)
{
const
auto
nx
=
reduceAdd
(
x
);
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
nz
=
reduceAdd
(
z
);
const
auto
nz
=
reduceAdd
(
z
);
...
...
openmmapi/include/openmm/internal/vectorize_ppc.h
View file @
f83289a4
...
@@ -97,45 +97,45 @@ public:
...
@@ -97,45 +97,45 @@ public:
v
[
2
]
=
val
[
2
];
v
[
2
]
=
val
[
2
];
}
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
fvec4
operator
+
(
fvec4
other
)
const
{
return
vec_add
(
val
,
other
.
val
);
return
vec_add
(
val
,
other
.
val
);
}
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
fvec4
operator
-
(
fvec4
other
)
const
{
return
vec_sub
(
val
,
other
.
val
);
return
vec_sub
(
val
,
other
.
val
);
}
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
fvec4
operator
*
(
fvec4
other
)
const
{
return
vec_mul
(
val
,
other
.
val
);
return
vec_mul
(
val
,
other
.
val
);
}
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
fvec4
operator
/
(
fvec4
other
)
const
{
return
vec_div
(
val
,
other
.
val
);
return
vec_div
(
val
,
other
.
val
);
}
}
void
operator
+=
(
const
fvec4
&
other
)
{
void
operator
+=
(
fvec4
other
)
{
val
=
vec_add
(
val
,
other
.
val
);
val
=
vec_add
(
val
,
other
.
val
);
}
}
void
operator
-=
(
const
fvec4
&
other
)
{
void
operator
-=
(
fvec4
other
)
{
val
=
vec_sub
(
val
,
other
.
val
);
val
=
vec_sub
(
val
,
other
.
val
);
}
}
void
operator
*=
(
const
fvec4
&
other
)
{
void
operator
*=
(
fvec4
other
)
{
val
=
vec_mul
(
val
,
other
.
val
);
val
=
vec_mul
(
val
,
other
.
val
);
}
}
void
operator
/=
(
const
fvec4
&
other
)
{
void
operator
/=
(
fvec4
other
)
{
val
=
vec_div
(
val
,
other
.
val
);
val
=
vec_div
(
val
,
other
.
val
);
}
}
fvec4
operator
-
()
const
{
fvec4
operator
-
()
const
{
return
-
val
;
return
-
val
;
}
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
fvec4
operator
&
(
fvec4
other
)
const
{
return
vec_and
(
val
,
other
.
val
);
return
vec_and
(
val
,
other
.
val
);
}
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
fvec4
operator
|
(
fvec4
other
)
const
{
return
vec_or
(
val
,
other
.
val
);
return
vec_or
(
val
,
other
.
val
);
}
}
ivec4
operator
==
(
const
fvec4
&
other
)
const
;
ivec4
operator
==
(
fvec4
other
)
const
;
ivec4
operator
!=
(
const
fvec4
&
other
)
const
;
ivec4
operator
!=
(
fvec4
other
)
const
;
ivec4
operator
>
(
const
fvec4
&
other
)
const
;
ivec4
operator
>
(
fvec4
other
)
const
;
ivec4
operator
<
(
const
fvec4
&
other
)
const
;
ivec4
operator
<
(
fvec4
other
)
const
;
ivec4
operator
>=
(
const
fvec4
&
other
)
const
;
ivec4
operator
>=
(
fvec4
other
)
const
;
ivec4
operator
<=
(
const
fvec4
&
other
)
const
;
ivec4
operator
<=
(
fvec4
other
)
const
;
operator
ivec4
()
const
;
operator
ivec4
()
const
;
/***
/***
...
@@ -173,49 +173,49 @@ public:
...
@@ -173,49 +173,49 @@ public:
void
store
(
int
*
v
)
const
{
void
store
(
int
*
v
)
const
{
*
((
__m128i
*
)
v
)
=
val
;
*
((
__m128i
*
)
v
)
=
val
;
}
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
ivec4
operator
+
(
ivec4
other
)
const
{
return
vec_add
(
val
,
other
.
val
);
return
vec_add
(
val
,
other
.
val
);
}
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
ivec4
operator
-
(
ivec4
other
)
const
{
return
vec_sub
(
val
,
other
.
val
);
return
vec_sub
(
val
,
other
.
val
);
}
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
ivec4
operator
*
(
ivec4
other
)
const
{
return
val
*
other
.
val
;
return
val
*
other
.
val
;
}
}
void
operator
+=
(
const
ivec4
&
other
)
{
void
operator
+=
(
ivec4
other
)
{
val
=
vec_add
(
val
,
other
.
val
);
val
=
vec_add
(
val
,
other
.
val
);
}
}
void
operator
-=
(
const
ivec4
&
other
)
{
void
operator
-=
(
ivec4
other
)
{
val
=
vec_sub
(
val
,
other
.
val
);
val
=
vec_sub
(
val
,
other
.
val
);
}
}
void
operator
*=
(
const
ivec4
&
other
)
{
void
operator
*=
(
ivec4
other
)
{
val
=
val
*
other
.
val
;
val
=
val
*
other
.
val
;
}
}
ivec4
operator
-
()
const
{
ivec4
operator
-
()
const
{
return
-
val
;
return
-
val
;
}
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
ivec4
operator
&
(
ivec4
other
)
const
{
return
val
&
other
.
val
;
return
val
&
other
.
val
;
}
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
ivec4
operator
|
(
ivec4
other
)
const
{
return
val
|
other
.
val
;
return
val
|
other
.
val
;
}
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
ivec4
operator
==
(
ivec4
other
)
const
{
return
(
val
==
other
.
val
);
return
(
val
==
other
.
val
);
}
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
ivec4
operator
!=
(
ivec4
other
)
const
{
return
(
val
!=
other
.
val
);
return
(
val
!=
other
.
val
);
}
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
ivec4
operator
>
(
ivec4
other
)
const
{
return
(
val
>
other
.
val
);
return
(
val
>
other
.
val
);
}
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
ivec4
operator
<
(
ivec4
other
)
const
{
return
(
val
<
other
.
val
);
return
(
val
<
other
.
val
);
}
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
ivec4
operator
>=
(
ivec4
other
)
const
{
return
(
val
>=
other
.
val
);
return
(
val
>=
other
.
val
);
}
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
ivec4
operator
<=
(
ivec4
other
)
const
{
return
(
val
<=
other
.
val
);
return
(
val
<=
other
.
val
);
}
}
operator
fvec4
()
const
;
operator
fvec4
()
const
;
...
@@ -223,27 +223,27 @@ public:
...
@@ -223,27 +223,27 @@ public:
// Conversion operators.
// Conversion operators.
inline
ivec4
fvec4
::
operator
==
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
==
(
fvec4
other
)
const
{
return
(
val
==
other
.
val
);
return
(
val
==
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
!=
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
!=
(
fvec4
other
)
const
{
return
(
val
!=
other
.
val
);
return
(
val
!=
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
>
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
>
(
fvec4
other
)
const
{
return
(
val
>
other
.
val
);
return
(
val
>
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
<
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
<
(
fvec4
other
)
const
{
return
(
val
<
other
.
val
);
return
(
val
<
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
>=
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
>=
(
fvec4
other
)
const
{
return
(
val
>=
other
.
val
);
return
(
val
>=
other
.
val
);
}
}
inline
ivec4
fvec4
::
operator
<=
(
const
fvec4
&
other
)
const
{
inline
ivec4
fvec4
::
operator
<=
(
fvec4
other
)
const
{
return
(
val
<=
other
.
val
);
return
(
val
<=
other
.
val
);
}
}
...
@@ -264,34 +264,34 @@ inline ivec4 fvec4::expandBitsToMask(int bitmask) {
...
@@ -264,34 +264,34 @@ inline ivec4 fvec4::expandBitsToMask(int bitmask) {
// Functions that operate on fvec4s.
// Functions that operate on fvec4s.
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
inline
fvec4
abs
(
fvec4
v
)
{
return
vec_abs
(
v
.
val
);
return
vec_abs
(
v
.
val
);
}
}
static
inline
fvec4
exp
(
const
fvec4
&
v
)
{
static
inline
fvec4
exp
(
fvec4
v
)
{
return
fvec4
(
expf
(
v
[
0
]),
expf
(
v
[
1
]),
expf
(
v
[
2
]),
expf
(
v
[
3
]));
return
fvec4
(
expf
(
v
[
0
]),
expf
(
v
[
1
]),
expf
(
v
[
2
]),
expf
(
v
[
3
]));
}
}
static
inline
fvec4
log
(
const
fvec4
&
v
)
{
static
inline
fvec4
log
(
fvec4
v
)
{
return
fvec4
(
logf
(
v
[
0
]),
logf
(
v
[
1
]),
logf
(
v
[
2
]),
logf
(
v
[
3
]));
return
fvec4
(
logf
(
v
[
0
]),
logf
(
v
[
1
]),
logf
(
v
[
2
]),
logf
(
v
[
3
]));
}
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
r
=
v1
*
v2
;
fvec4
r
=
v1
*
v2
;
return
r
[
0
]
+
r
[
1
]
+
r
[
2
];
return
r
[
0
]
+
r
[
1
]
+
r
[
2
];
}
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
r
=
v1
*
v2
;
fvec4
r
=
v1
*
v2
;
fvec4
temp
=
r
+
vec_sld
(
r
.
val
,
r
.
val
,
8
);
fvec4
temp
=
r
+
vec_sld
(
r
.
val
,
r
.
val
,
8
);
return
temp
[
0
]
+
temp
[
1
];
return
temp
[
0
]
+
temp
[
1
];
}
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
static
inline
float
reduceAdd
(
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
cross
(
fvec4
v1
,
fvec4
v2
)
{
vector
unsigned
char
perm
=
(
vector
unsigned
char
)
{
8
,
9
,
10
,
11
,
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
12
,
13
,
14
,
15
};
vector
unsigned
char
perm
=
(
vector
unsigned
char
)
{
8
,
9
,
10
,
11
,
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
12
,
13
,
14
,
15
};
__m128
temp
=
v2
.
val
*
vec_perm
(
v1
.
val
,
v1
.
val
,
perm
)
-
__m128
temp
=
v2
.
val
*
vec_perm
(
v1
.
val
,
v1
.
val
,
perm
)
-
v1
.
val
*
vec_perm
(
v2
.
val
,
v2
.
val
,
perm
);
v1
.
val
*
vec_perm
(
v2
.
val
,
v2
.
val
,
perm
);
...
@@ -324,80 +324,80 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
...
@@ -324,80 +324,80 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
/**
/**
* Out-of-place transpose from named variables into an array.
* Out-of-place transpose from named variables into an array.
*/
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
static
inline
void
transpose
(
fvec4
v0
,
fvec4
v1
,
fvec4
v2
,
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
}
// Functions that operate on ivec4s.
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
return
vec_min
(
v1
.
val
,
v2
.
val
);
return
vec_min
(
v1
.
val
,
v2
.
val
);
}
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
max
(
ivec4
v1
,
ivec4
v2
)
{
return
vec_max
(
v1
.
val
,
v2
.
val
);
return
vec_max
(
v1
.
val
,
v2
.
val
);
}
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
static
inline
ivec4
abs
(
ivec4
v
)
{
return
vec_abs
(
v
.
val
);
return
vec_abs
(
v
.
val
);
}
}
static
inline
bool
any
(
const
ivec4
v
)
{
static
inline
bool
any
(
ivec4
v
)
{
return
!
vec_all_eq
(
v
.
val
,
ivec4
(
0
).
val
);
return
!
vec_all_eq
(
v
.
val
,
ivec4
(
0
).
val
);
}
}
// Mathematical operators involving a scalar and a vector.
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
+
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
+
v2
;
return
fvec4
(
v1
)
+
v2
;
}
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
-
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
-
v2
;
return
fvec4
(
v1
)
-
v2
;
}
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
*
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
*
v2
;
return
fvec4
(
v1
)
*
v2
;
}
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
/
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
/
v2
;
return
fvec4
(
v1
)
/
v2
;
}
}
// Operations for blending fvec4s based on an ivec4.
// Operations for blending fvec4s based on an ivec4.
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
__m128i
&
mask
)
{
static
inline
fvec4
blend
(
fvec4
v1
,
fvec4
v2
,
__m128i
mask
)
{
return
(
__m128
)
((
mask
&
(
__m128i
)
v2
.
val
)
+
((
ivec4
(
0xFFFFFFFF
)
-
ivec4
(
mask
))
&
(
__m128i
)
v1
.
val
).
val
);
return
(
__m128
)
((
mask
&
(
__m128i
)
v2
.
val
)
+
((
ivec4
(
0xFFFFFFFF
)
-
ivec4
(
mask
))
&
(
__m128i
)
v1
.
val
).
val
);
}
}
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
ivec4
mask
)
{
static
inline
fvec4
blendZero
(
fvec4
v
,
ivec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
return
blend
(
0.0
f
,
v
,
mask
);
}
}
// These are at the end since they involve other functions defined above.
// These are at the end since they involve other functions defined above.
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
min
(
fvec4
v1
,
fvec4
v2
)
{
return
vec_min
(
v1
.
val
,
v2
.
val
);
return
vec_min
(
v1
.
val
,
v2
.
val
);
}
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
max
(
fvec4
v1
,
fvec4
v2
)
{
return
vec_max
(
v1
.
val
,
v2
.
val
);
return
vec_max
(
v1
.
val
,
v2
.
val
);
}
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
static
inline
fvec4
round
(
fvec4
v
)
{
return
vec_round
(
v
.
val
);
return
vec_round
(
v
.
val
);
}
}
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
static
inline
fvec4
floor
(
fvec4
v
)
{
return
vec_floor
(
v
.
val
);
return
vec_floor
(
v
.
val
);
}
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
static
inline
fvec4
ceil
(
fvec4
v
)
{
return
vec_ceil
(
v
.
val
);
return
vec_ceil
(
v
.
val
);
}
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
rsqrt
(
fvec4
v
)
{
// Initial estimate of rsqrt().
// Initial estimate of rsqrt().
fvec4
y
(
vec_rsqrte
(
v
.
val
));
fvec4
y
(
vec_rsqrte
(
v
.
val
));
...
@@ -409,7 +409,7 @@ static inline fvec4 rsqrt(const fvec4& v) {
...
@@ -409,7 +409,7 @@ static inline fvec4 rsqrt(const fvec4& v) {
return
y
;
return
y
;
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
sqrt
(
fvec4
v
)
{
return
vec_sqrt
(
v
.
val
);
return
vec_sqrt
(
v
.
val
);
}
}
...
@@ -417,7 +417,7 @@ static inline fvec4 sqrt(const fvec4& v) {
...
@@ -417,7 +417,7 @@ static inline fvec4 sqrt(const fvec4& v) {
* of vectors. The first result vector contains the values at the given indexes, and the second
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
* result vector contains the values from each respective index+1.
*/
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
static
inline
void
gatherVecPair
(
const
float
*
table
,
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t2
(
table
+
index
[
2
]);
...
@@ -440,7 +440,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
...
@@ -440,7 +440,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
* output[3] = undefined
*/
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
static
inline
fvec4
reduceToVec3
(
fvec4
x
,
fvec4
y
,
fvec4
z
)
{
const
auto
nx
=
reduceAdd
(
x
);
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
nz
=
reduceAdd
(
z
);
const
auto
nz
=
reduceAdd
(
z
);
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
f83289a4
...
@@ -108,55 +108,55 @@ public:
...
@@ -108,55 +108,55 @@ public:
#endif
#endif
}
}
fvec4
operator
+
(
const
fvec4
&
other
)
const
{
fvec4
operator
+
(
fvec4
other
)
const
{
return
_mm_add_ps
(
val
,
other
);
return
_mm_add_ps
(
val
,
other
);
}
}
fvec4
operator
-
(
const
fvec4
&
other
)
const
{
fvec4
operator
-
(
fvec4
other
)
const
{
return
_mm_sub_ps
(
val
,
other
);
return
_mm_sub_ps
(
val
,
other
);
}
}
fvec4
operator
*
(
const
fvec4
&
other
)
const
{
fvec4
operator
*
(
fvec4
other
)
const
{
return
_mm_mul_ps
(
val
,
other
);
return
_mm_mul_ps
(
val
,
other
);
}
}
fvec4
operator
/
(
const
fvec4
&
other
)
const
{
fvec4
operator
/
(
fvec4
other
)
const
{
return
_mm_div_ps
(
val
,
other
);
return
_mm_div_ps
(
val
,
other
);
}
}
void
operator
+=
(
const
fvec4
&
other
)
{
void
operator
+=
(
fvec4
other
)
{
val
=
_mm_add_ps
(
val
,
other
);
val
=
_mm_add_ps
(
val
,
other
);
}
}
void
operator
-=
(
const
fvec4
&
other
)
{
void
operator
-=
(
fvec4
other
)
{
val
=
_mm_sub_ps
(
val
,
other
);
val
=
_mm_sub_ps
(
val
,
other
);
}
}
void
operator
*=
(
const
fvec4
&
other
)
{
void
operator
*=
(
fvec4
other
)
{
val
=
_mm_mul_ps
(
val
,
other
);
val
=
_mm_mul_ps
(
val
,
other
);
}
}
void
operator
/=
(
const
fvec4
&
other
)
{
void
operator
/=
(
fvec4
other
)
{
val
=
_mm_div_ps
(
val
,
other
);
val
=
_mm_div_ps
(
val
,
other
);
}
}
fvec4
operator
-
()
const
{
fvec4
operator
-
()
const
{
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
}
}
fvec4
operator
&
(
const
fvec4
&
other
)
const
{
fvec4
operator
&
(
fvec4
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
return
_mm_and_ps
(
val
,
other
);
}
}
fvec4
operator
|
(
const
fvec4
&
other
)
const
{
fvec4
operator
|
(
fvec4
other
)
const
{
return
_mm_or_ps
(
val
,
other
);
return
_mm_or_ps
(
val
,
other
);
}
}
fvec4
operator
==
(
const
fvec4
&
other
)
const
{
fvec4
operator
==
(
fvec4
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
return
_mm_cmpeq_ps
(
val
,
other
);
}
}
fvec4
operator
!=
(
const
fvec4
&
other
)
const
{
fvec4
operator
!=
(
fvec4
other
)
const
{
return
_mm_cmpneq_ps
(
val
,
other
);
return
_mm_cmpneq_ps
(
val
,
other
);
}
}
fvec4
operator
>
(
const
fvec4
&
other
)
const
{
fvec4
operator
>
(
fvec4
other
)
const
{
return
_mm_cmpgt_ps
(
val
,
other
);
return
_mm_cmpgt_ps
(
val
,
other
);
}
}
fvec4
operator
<
(
const
fvec4
&
other
)
const
{
fvec4
operator
<
(
fvec4
other
)
const
{
return
_mm_cmplt_ps
(
val
,
other
);
return
_mm_cmplt_ps
(
val
,
other
);
}
}
fvec4
operator
>=
(
const
fvec4
&
other
)
const
{
fvec4
operator
>=
(
fvec4
other
)
const
{
return
_mm_cmpge_ps
(
val
,
other
);
return
_mm_cmpge_ps
(
val
,
other
);
}
}
fvec4
operator
<=
(
const
fvec4
&
other
)
const
{
fvec4
operator
<=
(
fvec4
other
)
const
{
return
_mm_cmple_ps
(
val
,
other
);
return
_mm_cmple_ps
(
val
,
other
);
}
}
operator
ivec4
()
const
;
operator
ivec4
()
const
;
...
@@ -191,49 +191,49 @@ public:
...
@@ -191,49 +191,49 @@ public:
void
store
(
int
*
v
)
const
{
void
store
(
int
*
v
)
const
{
_mm_storeu_si128
((
__m128i
*
)
v
,
val
);
_mm_storeu_si128
((
__m128i
*
)
v
,
val
);
}
}
ivec4
operator
+
(
const
ivec4
&
other
)
const
{
ivec4
operator
+
(
ivec4
other
)
const
{
return
_mm_add_epi32
(
val
,
other
);
return
_mm_add_epi32
(
val
,
other
);
}
}
ivec4
operator
-
(
const
ivec4
&
other
)
const
{
ivec4
operator
-
(
ivec4
other
)
const
{
return
_mm_sub_epi32
(
val
,
other
);
return
_mm_sub_epi32
(
val
,
other
);
}
}
ivec4
operator
*
(
const
ivec4
&
other
)
const
{
ivec4
operator
*
(
ivec4
other
)
const
{
return
_mm_mullo_epi32
(
val
,
other
);
return
_mm_mullo_epi32
(
val
,
other
);
}
}
void
operator
+=
(
const
ivec4
&
other
)
{
void
operator
+=
(
ivec4
other
)
{
val
=
_mm_add_epi32
(
val
,
other
);
val
=
_mm_add_epi32
(
val
,
other
);
}
}
void
operator
-=
(
const
ivec4
&
other
)
{
void
operator
-=
(
ivec4
other
)
{
val
=
_mm_sub_epi32
(
val
,
other
);
val
=
_mm_sub_epi32
(
val
,
other
);
}
}
void
operator
*=
(
const
ivec4
&
other
)
{
void
operator
*=
(
ivec4
other
)
{
val
=
_mm_mullo_epi32
(
val
,
other
);
val
=
_mm_mullo_epi32
(
val
,
other
);
}
}
ivec4
operator
-
()
const
{
ivec4
operator
-
()
const
{
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
}
}
ivec4
operator
&
(
const
ivec4
&
other
)
const
{
ivec4
operator
&
(
ivec4
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
return
_mm_and_si128
(
val
,
other
);
}
}
ivec4
operator
|
(
const
ivec4
&
other
)
const
{
ivec4
operator
|
(
ivec4
other
)
const
{
return
_mm_or_si128
(
val
,
other
);
return
_mm_or_si128
(
val
,
other
);
}
}
ivec4
operator
==
(
const
ivec4
&
other
)
const
{
ivec4
operator
==
(
ivec4
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
return
_mm_cmpeq_epi32
(
val
,
other
);
}
}
ivec4
operator
!=
(
const
ivec4
&
other
)
const
{
ivec4
operator
!=
(
ivec4
other
)
const
{
return
_mm_xor_si128
(
*
this
==
other
,
_mm_set1_epi32
(
0xFFFFFFFF
));
return
_mm_xor_si128
(
*
this
==
other
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
}
ivec4
operator
>
(
const
ivec4
&
other
)
const
{
ivec4
operator
>
(
ivec4
other
)
const
{
return
_mm_cmpgt_epi32
(
val
,
other
);
return
_mm_cmpgt_epi32
(
val
,
other
);
}
}
ivec4
operator
<
(
const
ivec4
&
other
)
const
{
ivec4
operator
<
(
ivec4
other
)
const
{
return
_mm_cmplt_epi32
(
val
,
other
);
return
_mm_cmplt_epi32
(
val
,
other
);
}
}
ivec4
operator
>=
(
const
ivec4
&
other
)
const
{
ivec4
operator
>=
(
ivec4
other
)
const
{
return
_mm_xor_si128
(
_mm_cmplt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
return
_mm_xor_si128
(
_mm_cmplt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
}
ivec4
operator
<=
(
const
ivec4
&
other
)
const
{
ivec4
operator
<=
(
ivec4
other
)
const
{
return
_mm_xor_si128
(
_mm_cmpgt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
return
_mm_xor_si128
(
_mm_cmpgt_epi32
(
val
,
other
),
_mm_set1_epi32
(
0xFFFFFFFF
));
}
}
operator
fvec4
()
const
;
operator
fvec4
()
const
;
...
@@ -258,36 +258,36 @@ inline fvec4 fvec4::expandBitsToMask(int bitmask) {
...
@@ -258,36 +258,36 @@ inline fvec4 fvec4::expandBitsToMask(int bitmask) {
// Functions that operate on fvec4s.
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
const
fvec4
&
v
)
{
static
inline
fvec4
floor
(
fvec4
v
)
{
return
fvec4
(
_mm_floor_ps
(
v
.
val
));
return
fvec4
(
_mm_floor_ps
(
v
.
val
));
}
}
static
inline
fvec4
ceil
(
const
fvec4
&
v
)
{
static
inline
fvec4
ceil
(
fvec4
v
)
{
return
fvec4
(
_mm_ceil_ps
(
v
.
val
));
return
fvec4
(
_mm_ceil_ps
(
v
.
val
));
}
}
static
inline
fvec4
round
(
const
fvec4
&
v
)
{
static
inline
fvec4
round
(
fvec4
v
)
{
return
fvec4
(
_mm_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
return
fvec4
(
_mm_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
}
static
inline
fvec4
min
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
min
(
fvec4
v1
,
fvec4
v2
)
{
return
fvec4
(
_mm_min_ps
(
v1
.
val
,
v2
.
val
));
return
fvec4
(
_mm_min_ps
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
fvec4
max
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
max
(
fvec4
v1
,
fvec4
v2
)
{
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
fvec4
abs
(
const
fvec4
&
v
)
{
static
inline
fvec4
abs
(
fvec4
v
)
{
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
sqrt
(
fvec4
v
)
{
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
rsqrt
(
fvec4
v
)
{
// Initial estimate of rsqrt().
// Initial estimate of rsqrt().
fvec4
y
(
_mm_rsqrt_ps
(
v
.
val
));
fvec4
y
(
_mm_rsqrt_ps
(
v
.
val
));
...
@@ -299,27 +299,27 @@ static inline fvec4 rsqrt(const fvec4& v) {
...
@@ -299,27 +299,27 @@ static inline fvec4 rsqrt(const fvec4& v) {
return
y
;
return
y
;
}
}
static
inline
fvec4
exp
(
const
fvec4
&
v
)
{
static
inline
fvec4
exp
(
fvec4
v
)
{
return
fvec4
(
exp_ps
(
v
.
val
));
return
fvec4
(
exp_ps
(
v
.
val
));
}
}
static
inline
fvec4
log
(
const
fvec4
&
v
)
{
static
inline
fvec4
log
(
fvec4
v
)
{
return
fvec4
(
log_ps
(
v
.
val
));
return
fvec4
(
log_ps
(
v
.
val
));
}
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
fvec4
v1
,
fvec4
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
}
static
inline
float
dot4
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
}
static
inline
float
reduceAdd
(
const
fvec4
v
)
{
static
inline
float
reduceAdd
(
fvec4
v
)
{
return
dot4
(
v
,
fvec4
(
1.0
f
));
return
dot4
(
v
,
fvec4
(
1.0
f
));
}
}
static
inline
fvec4
cross
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
cross
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
temp
=
fvec4
(
_mm_mul_ps
(
v1
,
_mm_shuffle_ps
(
v2
,
v2
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
))))
-
fvec4
temp
=
fvec4
(
_mm_mul_ps
(
v1
,
_mm_shuffle_ps
(
v2
,
v2
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
))))
-
fvec4
(
_mm_mul_ps
(
v2
,
_mm_shuffle_ps
(
v1
,
v1
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
))));
fvec4
(
_mm_mul_ps
(
v2
,
_mm_shuffle_ps
(
v1
,
v1
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
))));
return
_mm_shuffle_ps
(
temp
,
temp
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
));
return
_mm_shuffle_ps
(
temp
,
temp
,
_MM_SHUFFLE
(
3
,
0
,
2
,
1
));
...
@@ -340,53 +340,53 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
...
@@ -340,53 +340,53 @@ static inline void transpose(const fvec4 in[4], fvec4& v0, fvec4& v1, fvec4& v2,
/**
/**
* Out-of-place transpose from named variables into an array.
* Out-of-place transpose from named variables into an array.
*/
*/
static
inline
void
transpose
(
const
fvec4
v0
,
const
fvec4
v1
,
const
fvec4
v2
,
const
fvec4
v3
,
fvec4
out
[
4
])
{
static
inline
void
transpose
(
fvec4
v0
,
fvec4
v1
,
fvec4
v2
,
fvec4
v3
,
fvec4
out
[
4
])
{
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
out
[
0
]
=
v0
;
out
[
1
]
=
v1
;
out
[
2
]
=
v2
;
out
[
3
]
=
v3
;
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
transpose
(
out
[
0
],
out
[
1
],
out
[
2
],
out
[
3
]);
}
}
// Functions that operate on ivec4s.
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
return
ivec4
(
_mm_min_epi32
(
v1
.
val
,
v2
.
val
));
return
ivec4
(
_mm_min_epi32
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
ivec4
max
(
const
ivec4
&
v1
,
const
ivec4
&
v2
)
{
static
inline
ivec4
max
(
ivec4
v1
,
ivec4
v2
)
{
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
}
}
static
inline
ivec4
abs
(
const
ivec4
&
v
)
{
static
inline
ivec4
abs
(
ivec4
v
)
{
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
}
static
inline
bool
any
(
const
ivec4
&
v
)
{
static
inline
bool
any
(
ivec4
v
)
{
return
!
_mm_test_all_zeros
(
v
,
_mm_set1_epi32
(
0xFFFFFFFF
));
return
!
_mm_test_all_zeros
(
v
,
_mm_set1_epi32
(
0xFFFFFFFF
));
}
}
// Mathematical operators involving a scalar and a vector.
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
+
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
+
v2
;
return
fvec4
(
v1
)
+
v2
;
}
}
static
inline
fvec4
operator
-
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
-
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
-
v2
;
return
fvec4
(
v1
)
-
v2
;
}
}
static
inline
fvec4
operator
*
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
*
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
*
v2
;
return
fvec4
(
v1
)
*
v2
;
}
}
static
inline
fvec4
operator
/
(
float
v1
,
const
fvec4
&
v2
)
{
static
inline
fvec4
operator
/
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
/
v2
;
return
fvec4
(
v1
)
/
v2
;
}
}
// Operations for blending fvec4
// Operations for blending fvec4
static
inline
fvec4
blend
(
const
fvec4
&
v1
,
const
fvec4
&
v2
,
const
fvec4
&
mask
)
{
static
inline
fvec4
blend
(
fvec4
v1
,
fvec4
v2
,
fvec4
mask
)
{
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
mask
.
val
));
return
fvec4
(
_mm_blendv_ps
(
v1
.
val
,
v2
.
val
,
mask
.
val
));
}
}
static
inline
fvec4
blendZero
(
const
fvec4
v
,
const
fvec4
mask
)
{
static
inline
fvec4
blendZero
(
fvec4
v
,
fvec4
mask
)
{
return
blend
(
0.0
f
,
v
,
mask
);
return
blend
(
0.0
f
,
v
,
mask
);
}
}
...
@@ -394,7 +394,7 @@ static inline fvec4 blendZero(const fvec4 v, const fvec4 mask) {
...
@@ -394,7 +394,7 @@ static inline fvec4 blendZero(const fvec4 v, const fvec4 mask) {
* of vectors. The first result vector contains the values at the given indexes, and the second
* of vectors. The first result vector contains the values at the given indexes, and the second
* result vector contains the values from each respective index+1.
* result vector contains the values from each respective index+1.
*/
*/
static
inline
void
gatherVecPair
(
const
float
*
table
,
const
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
static
inline
void
gatherVecPair
(
const
float
*
table
,
ivec4
index
,
fvec4
&
out0
,
fvec4
&
out1
)
{
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t0
(
table
+
index
[
0
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t1
(
table
+
index
[
1
]);
fvec4
t2
(
table
+
index
[
2
]);
fvec4
t2
(
table
+
index
[
2
]);
...
@@ -417,7 +417,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
...
@@ -417,7 +417,7 @@ static inline void gatherVecPair(const float* table, const ivec4 index, fvec4& o
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[2] = (Z0 + Z1 + Z2 + Z3)
* output[3] = undefined
* output[3] = undefined
*/
*/
static
inline
fvec4
reduceToVec3
(
const
fvec4
x
,
const
fvec4
y
,
const
fvec4
z
)
{
static
inline
fvec4
reduceToVec3
(
fvec4
x
,
fvec4
y
,
fvec4
z
)
{
// :TODO: Could be made more efficient.
// :TODO: Could be made more efficient.
const
auto
nx
=
reduceAdd
(
x
);
const
auto
nx
=
reduceAdd
(
x
);
const
auto
ny
=
reduceAdd
(
y
);
const
auto
ny
=
reduceAdd
(
y
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment