Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b0d13582
Unverified
Commit
b0d13582
authored
May 26, 2020
by
peastman
Committed by
GitHub
May 26, 2020
Browse files
Merge pull request #2692 from dwtowner/cpu_generic_vector_test
Cpu generic vector test
parents
f902295b
154e2854
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
479 additions
and
222 deletions
+479
-222
openmmapi/include/openmm/internal/vectorize8.h
openmmapi/include/openmm/internal/vectorize8.h
+1
-1
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+1
-1
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+1
-1
openmmapi/include/openmm/internal/vectorize_ppc.h
openmmapi/include/openmm/internal/vectorize_ppc.h
+1
-1
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+1
-1
tests/TestVectorize.cpp
tests/TestVectorize.cpp
+7
-45
tests/TestVectorize8.cpp
tests/TestVectorize8.cpp
+5
-172
tests/TestVectorizeGeneric.h
tests/TestVectorizeGeneric.h
+462
-0
No files found.
openmmapi/include/openmm/internal/vectorize8.h
View file @
b0d13582
...
...
@@ -46,7 +46,7 @@ class fvec8 {
public:
__m256
val
;
fvec8
()
{}
fvec8
()
=
default
;
fvec8
(
float
v
)
:
val
(
_mm256_set1_ps
(
v
))
{}
fvec8
(
float
v1
,
float
v2
,
float
v3
,
float
v4
,
float
v5
,
float
v6
,
float
v7
,
float
v8
)
:
val
(
_mm256_set_ps
(
v8
,
v7
,
v6
,
v5
,
v4
,
v3
,
v2
,
v1
))
{}
fvec8
(
__m256
v
)
:
val
(
v
)
{}
...
...
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
b0d13582
...
...
@@ -74,7 +74,7 @@ class fvec4 {
public:
float32x4_t
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
:
val
(
vdupq_n_f32
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
{
float
v
[]
=
{
v1
,
v2
,
v3
,
v4
};
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
b0d13582
...
...
@@ -56,7 +56,7 @@ class fvec4 {
public:
__m128
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
{
val
=
{
v
,
v
,
v
,
v
};
}
...
...
openmmapi/include/openmm/internal/vectorize_ppc.h
View file @
b0d13582
...
...
@@ -57,7 +57,7 @@ class fvec4 {
public:
__m128
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
{
val
=
(
__m128
)
{
v
,
v
,
v
,
v
};
}
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
b0d13582
...
...
@@ -68,7 +68,7 @@ class fvec4 {
public:
__m128
val
;
fvec4
()
{}
fvec4
()
=
default
;
fvec4
(
float
v
)
:
val
(
_mm_set1_ps
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
:
val
(
_mm_set_ps
(
v4
,
v3
,
v2
,
v1
))
{}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
...
...
tests/TestVectorize.cpp
View file @
b0d13582
...
...
@@ -8,7 +8,7 @@
* *
* Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors:
*
* Contributors:
Daniel Towner
*
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
...
...
@@ -35,6 +35,9 @@
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/vectorize.h"
#include "TestVectorizeGeneric.h"
#include <iostream>
using
namespace
OpenMM
;
...
...
@@ -203,45 +206,6 @@ void testTranspose() {
ASSERT_VEC4_EQUAL
(
h
[
3
],
0.4
,
0.8
,
1.2
,
1.6
);
}
void
testUtility
()
{
fvec4
f1
(
7
,
2
,
-
5
,
13
);
fvec4
f2
(
1
,
2
,
4
,
7
);
fvec4
f3
(
0.5
,
1.0
,
1.5
,
2.0
);
// Reduce-add across three vectors into a single vec3.
const
auto
computedVec3
=
reduceToVec3
(
f1
,
f2
,
f3
);
ASSERT_EQUAL
(
17
,
computedVec3
[
0
]);
ASSERT_EQUAL
(
14
,
computedVec3
[
1
]);
ASSERT_EQUAL
(
5
,
computedVec3
[
2
]);
// Gather values from a table. Variants for both one vector and two vector gathers are provided.
float
table
[
2048
];
for
(
int
i
=
0
;
i
<
2048
;
++
i
)
table
[
i
]
=
-
i
;
// Same index to make it easy to debug, but negative to avoid copying idx.
// Single vector gather.
const
int
vidx
[
4
]
=
{
156
,
1987
,
33
,
1003
};
fvec4
g
(
table
,
vidx
);
ASSERT_VEC4_EQUAL
(
g
,
-
156
,
-
1987
,
-
33
,
-
1003
);
// Pair-wise vector gather.
fvec4
p0
,
p1
;
gatherVecPair
(
table
,
ivec4
(
57
,
105
,
1976
,
91
),
p0
,
p1
);
ASSERT_VEC4_EQUAL
(
p0
,
-
57
,
-
105
,
-
1976
,
-
91
);
ASSERT_VEC4_EQUAL
(
p1
,
-
58
,
-
106
,
-
1977
,
-
92
);
// Verify building blend mask from integer. The mask isn't checked directly, as different platforms
// use different types of mask. Instead, check the side effect of using the mask in a blend.
const
auto
elements
=
fvec4
(
1
,
2
,
3
,
4
);
const
auto
maskZero
=
fvec4
::
expandBitsToMask
(
0
);
ASSERT_VEC4_EQUAL_INT
(
blendZero
(
elements
,
maskZero
),
0
,
0
,
0
,
0
);
const
auto
maskOne
=
fvec4
::
expandBitsToMask
(
0b1111
);
ASSERT_VEC4_EQUAL_INT
(
blendZero
(
elements
,
maskOne
),
1
,
2
,
3
,
4
);
const
auto
maskMix
=
fvec4
::
expandBitsToMask
(
0b1001
);
ASSERT_VEC4_EQUAL_INT
(
blendZero
(
elements
,
maskMix
),
1
,
0
,
0
,
4
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
!
isVec4Supported
())
{
...
...
@@ -249,12 +213,10 @@ int main(int argc, char* argv[]) {
return
0
;
}
testLoadStore
();
testArithmetic
();
testLogic
();
testComparisons
();
testMathFunctions
();
testTranspose
();
testUtility
();
TestFvec
<
fvec4
>::
testAll
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
tests/TestVectorize8.cpp
View file @
b0d13582
...
...
@@ -8,7 +8,7 @@
* *
* Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Robert T. McGibbon *
* Contributors:
*
* Contributors:
Daniel Towner
*
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
...
...
@@ -37,6 +37,7 @@
#include "openmm/internal/vectorize8.h"
#include <iostream>
#include "TestVectorizeGeneric.h"
#ifndef __AVX__
bool
isVec8Supported
()
{
...
...
@@ -66,32 +67,15 @@ using namespace std;
#define ASSERT_VEC8_EQUAL(found, expected0, expected1, expected2, expected3, expected4, expected5, expected6, expected7) {if (std::abs((found).lowerVec()[0]-(expected0))>1e-6 || std::abs((found).lowerVec()[1]-(expected1))>1e-6 || std::abs((found).lowerVec()[2]-(expected2))>1e-6 || std::abs((found).lowerVec()[3]-(expected3))>1e-6 || std::abs((found).upperVec()[0]-(expected4))>1e-6 || std::abs((found).upperVec()[1]-(expected5))>1e-6 || std::abs((found).upperVec()[2]-(expected6))>1e-6 || std::abs((found).upperVec()[3]-(expected7))>1e-6) {std::stringstream details; details << " Expected ("<<(expected0)<<","<<(expected1)<<","<<(expected2)<<","<<(expected3)<<","<<(expected4)<<","<<(expected5)<<","<<(expected6)<<","<<(expected7)<<"), found ("<<(found).lowerVec()[0]<<","<<(found).lowerVec()[1]<<","<<(found).lowerVec()[2]<<","<<(found).lowerVec()[3]<<","<<(found).upperVec()[0]<<","<<(found).upperVec()[1]<<","<<(found).upperVec()[2]<<","<<(found).upperVec()[3]<<")"; throwException(__FILE__, __LINE__, details.str());}};
#define ASSERT_VEC8_EQUAL_INT(found, expected0, expected1, expected2, expected3, expected4, expected5, expected6, expected7) {if ((found).lowerVec()[0] != (expected0) || (found).lowerVec()[1] != (expected1) || (found).lowerVec()[2] != (expected2) || (found).lowerVec()[3] != (expected3) || (found).upperVec()[0] != (expected4) || (found).upperVec()[1] != (expected5) ||(found).upperVec()[2] != (expected6) || (found).upperVec()[3] != (expected7)) {std::stringstream details; details << " Expected ("<<(expected0)<<","<<(expected1)<<","<<(expected2)<<","<<(expected3)<<","<<(expected4)<<","<<(expected5)<<","<<(expected6)<<","<<(expected7)<<"), found ("<<(found).lowerVec()[0]<<","<<(found).lowerVec()[1]<<","<<(found).lowerVec()[2]<<","<<(found).lowerVec()[3]<<","<<(found).upperVec()[0]<<","<<(found).upperVec()[1]<<","<<(found).upperVec()[2]<<","<<(found).upperVec()[3]<<")"; throwException(__FILE__, __LINE__, details.str());}};
void
testLoadStore
()
{
fvec8
f1
(
2.0
);
ivec8
i1
(
3
);
ASSERT_VEC8_EQUAL
(
f1
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
);
ASSERT_VEC8_EQUAL_INT
(
i1
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
);
fvec8
f2
(
2.5
,
3.0
,
3.5
,
4.0
,
4.5
,
5.0
,
5.5
,
6.0
);
ivec8
i2
(
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
);
ASSERT_VEC8_EQUAL
(
f2
,
2.5
,
3.0
,
3.5
,
4.0
,
4.5
,
5.0
,
5.5
,
6.0
);
ASSERT_VEC8_EQUAL_INT
(
i2
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
);
float
farray
[
8
];
int
iarray
[
8
];
f2
.
store
(
farray
);
i2
.
store
(
iarray
);
fvec8
f3
(
farray
);
ivec8
i3
(
iarray
);
ASSERT_VEC8_EQUAL
(
f3
,
2.5
,
3.0
,
3.5
,
4.0
,
4.5
,
5.0
,
5.5
,
6.0
);
ASSERT_VEC8_EQUAL_INT
(
i3
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
);
ASSERT_EQUAL
(
f3
.
lowerVec
()[
0
],
2.5
);
ASSERT_EQUAL
(
f3
.
lowerVec
()[
1
],
3.0
);
ASSERT_EQUAL
(
f3
.
lowerVec
()[
2
],
3.5
);
ASSERT_EQUAL
(
f3
.
lowerVec
()[
3
],
4.0
);
ASSERT_EQUAL
(
f3
.
upperVec
()[
0
],
4.5
);
ASSERT_EQUAL
(
f3
.
upperVec
()[
1
],
5.0
);
ASSERT_EQUAL
(
f3
.
upperVec
()[
2
],
5.5
);
ASSERT_EQUAL
(
f3
.
upperVec
()[
3
],
6.0
);
ASSERT_EQUAL
(
i3
.
lowerVec
()[
0
],
2
);
ASSERT_EQUAL
(
i3
.
lowerVec
()[
1
],
3
);
ASSERT_EQUAL
(
i3
.
lowerVec
()[
2
],
4
);
...
...
@@ -112,27 +96,6 @@ void testLoadStore() {
ASSERT_EQUAL
(
overwriteTest
[
3
],
9
);
}
void
testArithmetic
()
{
fvec8
f1
(
0.5
,
1.0
,
1.5
,
2.0
,
2.5
,
3.0
,
3.5
,
4.0
);
ASSERT_VEC8_EQUAL
(
f1
+
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
),
1.5
,
3.
,
4.5
,
6.
,
7.5
,
9.
,
10.5
,
12.
);
ASSERT_VEC8_EQUAL
(
f1
-
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
),
-
0.5
,
-
1.
,
-
1.5
,
-
2.
,
-
2.5
,
-
3.
,
-
3.5
,
-
4.
);
ASSERT_VEC8_EQUAL
(
f1
*
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
),
0.5
,
2.
,
4.5
,
8.
,
12.5
,
18.
,
24.5
,
32.
);
ASSERT_VEC8_EQUAL
(
f1
/
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
),
0.5
,
0.5
,
0.5
,
0.5
,
0.5
,
0.5
,
0.5
,
0.5
);
f1
=
fvec8
(
0.5
,
1.0
,
1.5
,
2.0
,
2.5
,
3.0
,
3.5
,
4.0
);
f1
+=
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
);
ASSERT_VEC8_EQUAL
(
f1
,
1.5
,
3.
,
4.5
,
6.
,
7.5
,
9.
,
10.5
,
12.
);
f1
=
fvec8
(
0.5
,
1.0
,
1.5
,
2.0
,
2.5
,
3.0
,
3.5
,
4.0
);
f1
-=
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
);
ASSERT_VEC8_EQUAL
(
f1
,
-
0.5
,
-
1.
,
-
1.5
,
-
2.
,
-
2.5
,
-
3.
,
-
3.5
,
-
4.
);
f1
=
fvec8
(
0.5
,
1.0
,
1.5
,
2.0
,
2.5
,
3.0
,
3.5
,
4.0
);
f1
*=
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
);
ASSERT_VEC8_EQUAL
(
f1
,
0.5
,
2.
,
4.5
,
8.
,
12.5
,
18.
,
24.5
,
32.
);
f1
=
fvec8
(
0.5
,
1.0
,
1.5
,
2.0
,
2.5
,
3.0
,
3.5
,
4.0
);
f1
/=
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
);
ASSERT_VEC8_EQUAL
(
f1
,
0.5
,
0.5
,
0.5
,
0.5
,
0.5
,
0.5
,
0.5
,
0.5
);
}
void
testLogic
()
{
int
allBits
=
-
1
;
float
allBitsf
=
*
((
float
*
)
&
allBits
);
...
...
@@ -154,134 +117,6 @@ void testLogic() {
ASSERT_VEC8_EQUAL_INT
(
i1
|
mask
,
1
,
allBits
,
allBits
,
4
,
5
,
allBits
,
allBits
,
8
);
}
void
testComparisons
()
{
fvec8
v1
(
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
);
fvec8
v2
(
1.5
,
1.5
,
1.5
,
1.5
,
1.5
,
1.5
,
1.5
,
1.5
);
ASSERT_VEC8_EQUAL
(
blend
(
v1
,
v2
,
fvec8
(
1.0
,
1.5
,
3.0
,
2.2
,
10.0
,
10.5
,
13.0
,
12.2
)
==
fvec8
(
1.1
,
1.5
,
3.0
,
2.1
,
10.1
,
10.5
,
13.0
,
12.1
)),
0.0
,
1.5
,
1.5
,
0.0
,
0.0
,
1.5
,
1.5
,
0.0
);
ASSERT_VEC8_EQUAL
(
blend
(
v1
,
v2
,
fvec8
(
1.0
,
1.5
,
3.0
,
2.2
,
10.0
,
10.5
,
13.0
,
12.2
)
!=
fvec8
(
1.1
,
1.5
,
3.0
,
2.1
,
10.1
,
10.5
,
13.0
,
12.1
)),
1.5
,
0.0
,
0.0
,
1.5
,
1.5
,
0.0
,
0.0
,
1.5
);
ASSERT_VEC8_EQUAL
(
blend
(
v1
,
v2
,
fvec8
(
1.0
,
1.5
,
3.0
,
2.2
,
10.0
,
10.5
,
13.0
,
12.2
)
<
fvec8
(
1.1
,
1.5
,
3.0
,
2.1
,
10.1
,
10.5
,
13.0
,
12.1
)),
1.5
,
0.0
,
0.0
,
0.0
,
1.5
,
0.0
,
0.0
,
0.0
);
ASSERT_VEC8_EQUAL
(
blend
(
v1
,
v2
,
fvec8
(
1.0
,
1.5
,
3.0
,
2.2
,
10.0
,
10.5
,
13.0
,
12.2
)
>
fvec8
(
1.1
,
1.5
,
3.0
,
2.1
,
10.1
,
10.5
,
13.0
,
12.1
)),
0.0
,
0.0
,
0.0
,
1.5
,
0.0
,
0.0
,
0.0
,
1.5
);
ASSERT_VEC8_EQUAL
(
blend
(
v1
,
v2
,
fvec8
(
1.0
,
1.5
,
3.0
,
2.2
,
10.0
,
10.5
,
13.0
,
12.2
)
<=
fvec8
(
1.1
,
1.5
,
3.0
,
2.1
,
10.1
,
10.5
,
13.0
,
12.1
)),
1.5
,
1.5
,
1.5
,
0.0
,
1.5
,
1.5
,
1.5
,
0.0
);
ASSERT_VEC8_EQUAL
(
blend
(
v1
,
v2
,
fvec8
(
1.0
,
1.5
,
3.0
,
2.2
,
10.0
,
10.5
,
13.0
,
12.2
)
>=
fvec8
(
1.1
,
1.5
,
3.0
,
2.1
,
10.1
,
10.5
,
13.0
,
12.1
)),
0.0
,
1.5
,
1.5
,
1.5
,
0.0
,
1.5
,
1.5
,
1.5
);
}
void
testMathFunctions
()
{
fvec8
f1
(
0.4
,
1.9
,
-
1.2
,
-
3.8
,
0.4
,
1.9
,
-
1.2
,
-
3.8
);
fvec8
f2
(
1.1
,
1.2
,
1.3
,
-
5.0
,
1.1
,
1.2
,
1.3
,
-
5.0
);
ASSERT_VEC8_EQUAL
(
floor
(
f1
),
0.0
,
1.0
,
-
2.0
,
-
4.0
,
0.0
,
1.0
,
-
2.0
,
-
4.0
);
ASSERT_VEC8_EQUAL
(
ceil
(
f1
),
1.0
,
2.0
,
-
1.0
,
-
3.0
,
1.0
,
2.0
,
-
1.0
,
-
3.0
);
ASSERT_VEC8_EQUAL
(
round
(
f1
),
0.0
,
2.0
,
-
1.0
,
-
4.0
,
0.0
,
2.0
,
-
1.0
,
-
4.0
);
ASSERT_VEC8_EQUAL
(
abs
(
f1
),
0.4
,
1.9
,
1.2
,
3.8
,
0.4
,
1.9
,
1.2
,
3.8
);
ASSERT_VEC8_EQUAL
(
min
(
f1
,
f2
),
0.4
,
1.2
,
-
1.2
,
-
5.0
,
0.4
,
1.2
,
-
1.2
,
-
5.0
);
ASSERT_VEC8_EQUAL
(
max
(
f1
,
f2
),
1.1
,
1.9
,
1.3
,
-
3.8
,
1.1
,
1.9
,
1.3
,
-
3.8
);
ASSERT_VEC8_EQUAL
(
sqrt
(
fvec8
(
1.5
,
3.1
,
4.0
,
15.0
,
1.5
,
3.1
,
4.0
,
15.0
)),
sqrt
(
1.5
),
sqrt
(
3.1
),
sqrt
(
4.0
),
sqrt
(
15.0
),
sqrt
(
1.5
),
sqrt
(
3.1
),
sqrt
(
4.0
),
sqrt
(
15.0
));
ASSERT_VEC8_EQUAL
(
rsqrt
(
fvec8
(
1.5
,
3.1
,
4.0
,
15.0
,
1.5
,
3.1
,
4.0
,
15.0
)),
1.0
/
sqrt
(
1.5
),
1.0
/
sqrt
(
3.1
),
1.0
/
sqrt
(
4.0
),
1.0
/
sqrt
(
15.0
),
1.0
/
sqrt
(
1.5
),
1.0
/
sqrt
(
3.1
),
1.0
/
sqrt
(
4.0
),
1.0
/
sqrt
(
15.0
));
ASSERT_EQUAL_TOL
(
f1
.
lowerVec
()[
0
]
*
f2
.
lowerVec
()[
0
]
+
f1
.
lowerVec
()[
1
]
*
f2
.
lowerVec
()[
1
]
+
f1
.
lowerVec
()[
2
]
*
f2
.
lowerVec
()[
2
]
+
f1
.
lowerVec
()[
3
]
*
f2
.
lowerVec
()[
3
]
+
f1
.
upperVec
()[
0
]
*
f2
.
upperVec
()[
0
]
+
f1
.
upperVec
()[
1
]
*
f2
.
upperVec
()[
1
]
+
f1
.
upperVec
()[
2
]
*
f2
.
upperVec
()[
2
]
+
f1
.
upperVec
()[
3
]
*
f2
.
upperVec
()[
3
],
dot8
(
f1
,
f2
),
1e-6
);
ASSERT
(
any
(
f1
>
0.5
));
ASSERT
(
!
any
(
f1
>
2.0
));
ASSERT_VEC8_EQUAL
(
blend
(
f1
,
f2
,
ivec8
(
-
1
,
0
,
-
1
,
0
,
-
1
,
0
,
-
1
,
0
)),
1.1
,
1.9
,
1.3
,
-
3.8
,
1.1
,
1.9
,
1.3
,
-
3.8
);
}
void
testTranspose
()
{
fvec4
f
[
8
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
10.0
,
11.0
,
12.0
,
13.0
},
{
20.0
,
21.0
,
22.0
,
23.0
},
{
30.0
,
31.0
,
32.0
,
33.0
},
{
40.0
,
41.0
,
42.0
,
43.0
},
{
50.0
,
51.0
,
52.0
,
53.0
},
{
60.0
,
61.0
,
62.0
,
63.0
},
{
70.0
,
71.0
,
72.0
,
73.0
}
};
fvec8
o1
,
o2
,
o3
,
o4
;
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
],
f
[
4
],
f
[
5
],
f
[
6
],
f
[
7
],
o1
,
o2
,
o3
,
o4
);
ASSERT_VEC8_EQUAL
(
o1
,
0.0
,
10.0
,
20.0
,
30.0
,
40.0
,
50.0
,
60.0
,
70.0
);
ASSERT_VEC8_EQUAL
(
o2
,
1.0
,
11.0
,
21.0
,
31.0
,
41.0
,
51.0
,
61.0
,
71.0
);
ASSERT_VEC8_EQUAL
(
o3
,
2.0
,
12.0
,
22.0
,
32.0
,
42.0
,
52.0
,
62.0
,
72.0
);
ASSERT_VEC8_EQUAL
(
o4
,
3.0
,
13.0
,
23.0
,
33.0
,
43.0
,
53.0
,
63.0
,
73.0
);
fvec8
q1
,
q2
,
q3
,
q4
;
transpose
(
f
,
q1
,
q2
,
q3
,
q4
);
ASSERT_VEC8_EQUAL
(
q1
,
0.0
,
10.0
,
20.0
,
30.0
,
40.0
,
50.0
,
60.0
,
70.0
);
ASSERT_VEC8_EQUAL
(
q2
,
1.0
,
11.0
,
21.0
,
31.0
,
41.0
,
51.0
,
61.0
,
71.0
);
ASSERT_VEC8_EQUAL
(
q3
,
2.0
,
12.0
,
22.0
,
32.0
,
42.0
,
52.0
,
62.0
,
72.0
);
ASSERT_VEC8_EQUAL
(
q4
,
3.0
,
13.0
,
23.0
,
33.0
,
43.0
,
53.0
,
63.0
,
73.0
);
fvec4
g
[
8
];
transpose
(
o1
,
o2
,
o3
,
o4
,
g
[
0
],
g
[
1
],
g
[
2
],
g
[
3
],
g
[
4
],
g
[
5
],
g
[
6
],
g
[
7
]);
ASSERT_VEC4_EQUAL
(
g
[
0
],
0.0
,
1.0
,
2.0
,
3.0
);
ASSERT_VEC4_EQUAL
(
g
[
1
],
10.0
,
11.0
,
12.0
,
13.0
);
ASSERT_VEC4_EQUAL
(
g
[
2
],
20.0
,
21.0
,
22.0
,
23.0
);
ASSERT_VEC4_EQUAL
(
g
[
3
],
30.0
,
31.0
,
32.0
,
33.0
);
ASSERT_VEC4_EQUAL
(
g
[
4
],
40.0
,
41.0
,
42.0
,
43.0
);
ASSERT_VEC4_EQUAL
(
g
[
5
],
50.0
,
51.0
,
52.0
,
53.0
);
ASSERT_VEC4_EQUAL
(
g
[
6
],
60.0
,
61.0
,
62.0
,
63.0
);
ASSERT_VEC4_EQUAL
(
g
[
7
],
70.0
,
71.0
,
72.0
,
73.0
);
fvec4
h
[
8
];
transpose
(
o1
,
o2
,
o3
,
o4
,
h
);
ASSERT_VEC4_EQUAL
(
h
[
0
],
0.0
,
1.0
,
2.0
,
3.0
);
ASSERT_VEC4_EQUAL
(
h
[
1
],
10.0
,
11.0
,
12.0
,
13.0
);
ASSERT_VEC4_EQUAL
(
h
[
2
],
20.0
,
21.0
,
22.0
,
23.0
);
ASSERT_VEC4_EQUAL
(
h
[
3
],
30.0
,
31.0
,
32.0
,
33.0
);
ASSERT_VEC4_EQUAL
(
h
[
4
],
40.0
,
41.0
,
42.0
,
43.0
);
ASSERT_VEC4_EQUAL
(
h
[
5
],
50.0
,
51.0
,
52.0
,
53.0
);
ASSERT_VEC4_EQUAL
(
h
[
6
],
60.0
,
61.0
,
62.0
,
63.0
);
ASSERT_VEC4_EQUAL
(
h
[
7
],
70.0
,
71.0
,
72.0
,
73.0
);
}
void
testUtility
()
{
fvec8
f1
(
0.4
,
1.9
,
-
1.2
,
-
3.8
,
0.4
,
1.9
,
-
6.8
,
-
3.8
);
fvec8
f2
(
1
,
2
,
4
,
7
,
19
,
31
,
64
,
5
);
fvec8
f3
(
0.5
,
1.0
,
1.5
,
2.0
,
2.5
,
3.0
,
3.5
,
4.0
);
// Reduce-add across three vectors into a single vec3.
const
auto
computedVec3
=
reduceToVec3
(
f1
,
f2
,
f3
);
ASSERT_EQUAL
(
-
11
,
computedVec3
[
0
]);
ASSERT_EQUAL
(
133
,
computedVec3
[
1
]);
ASSERT_EQUAL
(
18
,
computedVec3
[
2
]);
// Gather values from a table. Variants for both one vector and two vector gathers are provided.
float
table
[
2048
];
for
(
int
i
=
0
;
i
<
2048
;
++
i
)
table
[
i
]
=
-
i
;
// Same index to make it easy to debug, but negative to avoid copying idx.
// Single vector gather.
const
int
vidx
[
8
]
=
{
4
,
8
,
156
,
1987
,
23
,
65
,
33
,
1003
};
fvec8
g
(
table
,
vidx
);
ASSERT_VEC8_EQUAL
(
g
,
-
4
,
-
8
,
-
156
,
-
1987
,
-
23
,
-
65
,
-
33
,
-
1003
);
// Pair-wise vector gather.
fvec8
p0
,
p1
;
gatherVecPair
(
table
,
ivec8
(
57
,
105
,
1976
,
91
,
636
,
1952
,
345
,
12
),
p0
,
p1
);
ASSERT_VEC8_EQUAL
(
p0
,
-
57
,
-
105
,
-
1976
,
-
91
,
-
636
,
-
1952
,
-
345
,
-
12
);
ASSERT_VEC8_EQUAL
(
p1
,
-
58
,
-
106
,
-
1977
,
-
92
,
-
637
,
-
1953
,
-
346
,
-
13
);
// Verify building blend mask from integer. The mask isn't checked directly, as different platforms
// use different types of mask. Instead, check the side effect of using the mask in a blend.
const
auto
elements
=
fvec8
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
);
const
auto
maskZero
=
fvec8
::
expandBitsToMask
(
0
);
ASSERT_VEC8_EQUAL_INT
(
blendZero
(
elements
,
maskZero
),
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
);
const
auto
maskOne
=
fvec8
::
expandBitsToMask
(
0b11111111
);
ASSERT_VEC8_EQUAL_INT
(
blendZero
(
elements
,
maskOne
),
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
);
const
auto
maskMix
=
fvec8
::
expandBitsToMask
(
0b01101001
);
ASSERT_VEC8_EQUAL_INT
(
blendZero
(
elements
,
maskMix
),
1
,
0
,
0
,
4
,
0
,
6
,
7
,
0
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
!
isVec8Supported
())
{
...
...
@@ -289,12 +124,10 @@ int main(int argc, char* argv[]) {
return
0
;
}
testLoadStore
();
testArithmetic
();
testLogic
();
testComparisons
();
testMathFunctions
();
testTranspose
();
testUtility
();
TestFvec
<
fvec8
>::
testAll
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
tests/TestVectorizeGeneric.h
0 → 100644
View file @
b0d13582
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014-2020 Stanford University and the Authors. *
* Authors: Daniel Towner *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#pragma once
/**
* This tests all sizes of vectorized operations using templated test code.
*/
#include <array>
#include <functional>
#include <iostream>
#include <iterator>
#include <numeric>
#include <memory.h>
#include <sstream>
#include <typeinfo>
/**
* Return the 32-bit integer bit pattern from the given floating-point value.
*/
static
int32_t
floatAsIntBits
(
float
f
)
{
int32_t
i
;
memcpy
(
&
i
,
&
f
,
4
);
return
i
;
}
/**
* Compare two floating-point values using units-in-last-place (ULP) as a measure of equality. Two values
* which are only a few representable values apart can be considered to be equal. Note that IEEE
* operations (add, mul, etc.) will always be exact, but sequences of operations might be more than
* a few ULP apart, but still close enough to be considered equal. ULP comparisons work at any scale of
* number, unlike an epsilon-based approach.
*/
static
bool
almostEqual
(
float
a
,
float
b
)
{
// Maybe they really are equal.
if
(
a
==
b
)
return
true
;
// Infinities and NANs are never equal to anything, even other nans and infinities.
if
(
std
::
isnan
(
a
)
||
std
::
isinf
(
a
)
||
std
::
isnan
(
b
)
||
std
::
isinf
(
b
))
return
false
;
// If they are different signs then they can't be equal. For two very small denormal values they might
// be very close to each other but either side of 0, but denormals are a corner case which don't deserve
// to be equal.
if
(
std
::
signbit
(
a
)
!=
std
::
signbit
(
b
))
return
false
;
// The two numbers must be valid values with the same sign, so treat then as basic integers to
// get at their ULP values. If they are only a few ULP apart, then they are essentially equal.
int32_t
intDiff
=
std
::
abs
(
floatAsIntBits
(
a
)
-
floatAsIntBits
(
b
));
return
intDiff
<
4
;
}
static
bool
exactlyEqual
(
float
a
,
float
b
)
{
return
a
==
b
;
}
/**
* Write the contents of the given array-like object to a stream. No formatting is applied.
*/
template
<
typename
FVEC
>
void
VecToStream
(
std
::
ostream
&
stream
,
const
FVEC
&
vec
)
{
constexpr
int
numElements
=
sizeof
(
FVEC
)
/
sizeof
(
float
);
const
float
*
vptr
=
(
const
float
*
)
&
vec
;
for
(
int
i
=
0
;
i
<
numElements
;
++
i
)
stream
<<
vptr
[
i
]
<<
", "
;
}
/**
* Given two vector-like objects compared each of their elements for equality. The vector objects can be
* anything which in memory is a list of 32-bit floating-point values, so SIMD vectors, C arrays or
* C++ arrays would all be valid.
*/
template
<
typename
S
,
typename
T
>
static
void
checkElementsEqual
(
const
S
&
computed
,
const
T
&
expected
,
std
::
function
<
bool
(
float
,
float
)
>
equal_fn
,
const
char
*
file
,
int
line
)
{
// Both S and T should be arrays of floats of the same length.
static_assert
(
sizeof
(
T
)
==
sizeof
(
S
),
"Array-like elements must have the same size"
);
constexpr
int
numElements
=
sizeof
(
S
)
/
sizeof
(
float
);
const
float
*
computedPtr
=
(
const
float
*
)
&
computed
;
const
float
*
expectedPtr
=
(
const
float
*
)
&
expected
;
std
::
ostringstream
details
;
details
<<
"Error during test for type "
<<
typeid
(
S
).
name
()
<<
'\n'
;
bool
passed
=
true
;
for
(
int
i
=
0
;
i
<
numElements
;
++
i
)
{
if
(
!
equal_fn
(
computedPtr
[
i
],
expectedPtr
[
i
]))
passed
=
false
;
}
if
(
!
passed
)
{
details
<<
"Values differ. "
;
VecToStream
(
details
,
computed
);
details
<<
" and "
;
VecToStream
(
details
,
expected
);
OpenMM
::
throwException
(
file
,
line
,
details
.
str
());
}
}
#define ASSERT_VEC_EQUAL(computed, expected) {checkElementsEqual(computed, expected, exactlyEqual, __FILE__, __LINE__);}
#define ASSERT_VEC_ALMOST_EQUAL(computed, expected) {checkElementsEqual(computed, expected, almostEqual, __FILE__, __LINE__);}
static
float
getRandomFloat
()
{
// Between -50 and 50.
return
float
(
rand
())
/
float
(
RAND_MAX
/
100.0
f
)
-
50.0
f
;
}
/**
* Given an array-like memory object containing floats, apply the given function to every element.
*/
template
<
typename
FVEC
>
FVEC
applyUnaryFn
(
const
FVEC
&
v
,
std
::
function
<
float
(
float
)
>
fn
)
{
constexpr
int
numElements
=
sizeof
(
FVEC
)
/
sizeof
(
float
);
FVEC
result
;
float
*
rp
=
(
float
*
)
&
result
;
const
float
*
vp
=
(
const
float
*
)
&
v
;
for
(
int
i
=
0
;
i
<
numElements
;
++
i
)
rp
[
i
]
=
fn
(
vp
[
i
]);
return
result
;
}
/**
* Given an array-like memory object containing floats, apply the given function to every element.
*/
template
<
typename
FVEC
>
FVEC
applyBinaryFn
(
const
FVEC
&
a
,
const
FVEC
&
b
,
std
::
function
<
float
(
float
,
float
)
>
fn
)
{
constexpr
int
numElements
=
sizeof
(
FVEC
)
/
sizeof
(
float
);
FVEC
result
;
float
*
rp
=
(
float
*
)
&
result
;
const
float
*
ap
=
(
const
float
*
)
&
a
;
const
float
*
bp
=
(
const
float
*
)
&
b
;
for
(
int
i
=
0
;
i
<
numElements
;
++
i
)
rp
[
i
]
=
fn
(
ap
[
i
],
bp
[
i
]);
return
result
;
}
/**
* Provide a test fixture class which underpins all verification for a given
* type of vector SIMD implementation, as well as providing common utility functions
*/
template
<
typename
FVEC
>
class
TestFvec
{
public:
static
constexpr
int
numElements
=
sizeof
(
FVEC
)
/
sizeof
(
float
);
void
testInitializers
()
const
;
void
testUnaryOps
()
const
;
void
testBinaryOps
()
const
;
void
testUtilities
()
const
;
void
testBlendAndCompare
()
const
;
void
testTranspose
()
const
;
static
void
testAll
()
{
TestFvec
<
FVEC
>
testUnit
;
testUnit
.
testInitializers
();
testUnit
.
testUnaryOps
();
testUnit
.
testBinaryOps
();
testUnit
.
testUtilities
();
testUnit
.
testBlendAndCompare
();
testUnit
.
testTranspose
();
}
FVEC
getRandomFvec
()
const
{
union
{
FVEC
v
;
float
f
[
numElements
];
};
for
(
auto
&
e
:
f
)
e
=
getRandomFloat
();
return
v
;
}
};
template
<
typename
FVEC
>
void
TestFvec
<
FVEC
>::
testInitializers
()
const
{
FVEC
computedZero
=
{};
float
expectedZero
[
numElements
]
=
{};
ASSERT_VEC_EQUAL
(
computedZero
,
expectedZero
);
FVEC
computedBroadcast
(
14.5
f
);
float
expectedBroadcast
[
numElements
];
std
::
fill_n
(
expectedBroadcast
,
numElements
,
14.5
f
);
ASSERT_VEC_EQUAL
(
computedBroadcast
,
expectedBroadcast
);
float
expectedArray
[
numElements
];
std
::
iota
(
expectedArray
,
expectedArray
+
numElements
,
23
);
FVEC
computedFromLoad
(
expectedArray
);
ASSERT_VEC_EQUAL
(
computedFromLoad
,
expectedArray
);
// Gather values from a table. Variants for both one vector and two vector gathers are provided.
// The indexes to gather (multiples of 7) are also generated, along with the expected answers.
float
gatherTable
[
2048
];
for
(
int
i
=
0
;
i
<
2048
;
++
i
)
gatherTable
[
i
]
=
-
i
;
// Same index to make it easy to debug, but negative to avoid copying idx.
int
gatherIndexes
[
numElements
];
float
gatherIndexesAsFloat
[
numElements
];
// Same as above, but in float format.
float
expectedGather0
[
numElements
];
float
expectedGather1
[
numElements
];
for
(
int
i
=
0
;
i
<
numElements
;
++
i
)
{
gatherIndexes
[
i
]
=
i
*
7
;
gatherIndexesAsFloat
[
i
]
=
float
(
gatherIndexes
[
i
]);
expectedGather0
[
i
]
=
-
(
i
*
7
);
expectedGather1
[
i
]
=
-
(
i
*
7
)
-
1
;
// Each value is one less than previous.
}
// Single value gather
FVEC
computedFromGather
(
gatherTable
,
gatherIndexes
);
ASSERT_VEC_EQUAL
(
computedFromGather
,
expectedGather0
);
// Pair-wise vector gather. The first values should be the same as a normal gather, and the
// second are just increments from the first. Note that there musty be some suitable conversion
// from a floating-point index (i.e., an integer value in float format), and the type required
// for the second operand of gatherVecPair. gatherVecPair can then take either an actual
// float vector, or some suitable format like ivec4 or ivec8.
FVEC
findex
(
gatherIndexesAsFloat
);
FVEC
p0
,
p1
;
gatherVecPair
(
gatherTable
,
findex
,
p0
,
p1
);
ASSERT_VEC_EQUAL
(
p0
,
expectedGather0
);
ASSERT_VEC_EQUAL
(
p1
,
expectedGather1
);
}
template
<
typename
FVEC
>
void
TestFvec
<
FVEC
>::
testUnaryOps
()
const
{
const
auto
v
=
getRandomFvec
();
// Note that these are exact comparisons because all these SIMD operators are
// just applying the scalar operator, so there should be no loss of precision.
ASSERT_VEC_EQUAL
(
abs
(
v
),
applyUnaryFn
(
v
,
[](
float
x
)
{
return
std
::
abs
(
x
);}
));
ASSERT_VEC_EQUAL
(
-
v
,
applyUnaryFn
(
v
,
[](
float
x
)
{
return
0
-
x
;}
));
ASSERT_VEC_EQUAL
(
floor
(
v
),
applyUnaryFn
(
v
,
[](
float
x
)
{
return
std
::
floor
(
x
);}
));
ASSERT_VEC_EQUAL
(
ceil
(
v
),
applyUnaryFn
(
v
,
[](
float
x
)
{
return
std
::
ceil
(
x
);}
));
ASSERT_VEC_EQUAL
(
round
(
v
),
applyUnaryFn
(
v
,
[](
float
x
)
{
return
std
::
round
(
x
);}
));
// Borrow a few other functions to test sqrt neatly.
const
auto
positiveValue
=
abs
(
v
)
+
1
;
ASSERT_VEC_ALMOST_EQUAL
(
sqrt
(
positiveValue
*
positiveValue
),
positiveValue
);
ASSERT_VEC_ALMOST_EQUAL
(
rsqrt
(
positiveValue
*
positiveValue
),
1.0
f
/
abs
(
positiveValue
));
}
template
<
typename
FVEC
>
void
TestFvec
<
FVEC
>::
testBinaryOps
()
const
{
const
auto
v0
=
getRandomFvec
();
const
auto
v1
=
getRandomFvec
();
// Note that most of these are exact comparisons because all these SIMD operators are
// just applying the scalar operator, so there should be no loss of precision. The one
// exception is division, which does often do something slightly different
// since division is an expensive operation (e.g., multiply by reciprocal).
// Binary operators.
ASSERT_VEC_EQUAL
(
v0
+
v1
,
applyBinaryFn
(
v0
,
v1
,
std
::
plus
<
float
>
()));
ASSERT_VEC_EQUAL
(
v0
-
v1
,
applyBinaryFn
(
v0
,
v1
,
std
::
minus
<
float
>
()));
ASSERT_VEC_EQUAL
(
v0
*
v1
,
applyBinaryFn
(
v0
,
v1
,
std
::
multiplies
<
float
>
()));
ASSERT_VEC_ALMOST_EQUAL
(
v0
/
v1
,
applyBinaryFn
(
v0
,
v1
,
std
::
divides
<
float
>
()));
// Assignment operators.
auto
addAssign
=
v0
;
addAssign
+=
v1
;
ASSERT_VEC_EQUAL
(
addAssign
,
applyBinaryFn
(
v0
,
v1
,
std
::
plus
<
float
>
()));
auto
subAssign
=
v0
;
subAssign
-=
v1
;
ASSERT_VEC_EQUAL
(
subAssign
,
applyBinaryFn
(
v0
,
v1
,
std
::
minus
<
float
>
()));
auto
mulAssign
=
v0
;
mulAssign
*=
v1
;
ASSERT_VEC_EQUAL
(
mulAssign
,
applyBinaryFn
(
v0
,
v1
,
std
::
multiplies
<
float
>
()));
auto
divAssign
=
v0
;
divAssign
/=
v1
;
ASSERT_VEC_ALMOST_EQUAL
(
divAssign
,
applyBinaryFn
(
v0
,
v1
,
std
::
divides
<
float
>
()));
// Binary ops between SIMD and scalar.
const
float
f
=
getRandomFloat
();
const
FVEC
fdup
(
f
);
ASSERT_VEC_EQUAL
(
v0
+
f
,
applyBinaryFn
(
v0
,
fdup
,
std
::
plus
<
float
>
()));
ASSERT_VEC_EQUAL
(
f
+
v0
,
applyBinaryFn
(
fdup
,
v0
,
std
::
plus
<
float
>
()));
ASSERT_VEC_EQUAL
(
v0
-
f
,
applyBinaryFn
(
v0
,
fdup
,
std
::
minus
<
float
>
()));
ASSERT_VEC_EQUAL
(
f
-
v0
,
applyBinaryFn
(
fdup
,
v0
,
std
::
minus
<
float
>
()));
ASSERT_VEC_EQUAL
(
v0
*
f
,
applyBinaryFn
(
v0
,
fdup
,
std
::
multiplies
<
float
>
()));
ASSERT_VEC_EQUAL
(
f
*
v0
,
applyBinaryFn
(
fdup
,
v0
,
std
::
multiplies
<
float
>
()));
ASSERT_VEC_ALMOST_EQUAL
(
v0
/
f
,
applyBinaryFn
(
v0
,
fdup
,
std
::
divides
<
float
>
()));
ASSERT_VEC_ALMOST_EQUAL
(
f
/
v0
,
applyBinaryFn
(
fdup
,
v0
,
std
::
divides
<
float
>
()));
// Binary functions.
using
std
::
min
;
using
std
::
max
;
ASSERT_VEC_EQUAL
(
min
(
v0
,
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
min
(
x
,
y
);
}));
ASSERT_VEC_EQUAL
(
max
(
v0
,
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
max
(
x
,
y
);
}));
}
template
<
typename
FVEC
>
void
TestFvec
<
FVEC
>::
testTranspose
()
const
{
// A table of random data to transpose.
float
table
[
numElements
*
4
];
for
(
auto
&
e
:
table
)
e
=
std
::
round
(
getRandomFloat
());
// Load the table row data into vectors.
const
auto
i0
=
FVEC
(
table
+
0
*
numElements
);
const
auto
i1
=
FVEC
(
table
+
1
*
numElements
);
const
auto
i2
=
FVEC
(
table
+
2
*
numElements
);
const
auto
i3
=
FVEC
(
table
+
3
*
numElements
);
// Manually transpose the data.
std
::
array
<
float
,
numElements
*
4
>
expectedTranspose
;
for
(
auto
r
=
0
;
r
<
4
;
++
r
)
{
for
(
auto
c
=
0
;
c
<
numElements
;
++
c
)
{
expectedTranspose
[
c
*
4
+
r
]
=
table
[
r
*
numElements
+
c
];
}
}
fvec4
computedTranspose
[
numElements
];
transpose
(
i0
,
i1
,
i2
,
i3
,
computedTranspose
);
ASSERT_VEC_EQUAL
(
computedTranspose
,
expectedTranspose
);
FVEC
o0
,
o1
,
o2
,
o3
;
transpose
(
computedTranspose
,
o0
,
o1
,
o2
,
o3
);
ASSERT_VEC_EQUAL
(
i0
,
o0
);
ASSERT_VEC_EQUAL
(
i1
,
o1
);
ASSERT_VEC_EQUAL
(
i2
,
o2
);
ASSERT_VEC_EQUAL
(
i3
,
o3
);
}
template
<
typename
FVEC
>
void
TestFvec
<
FVEC
>::
testBlendAndCompare
()
const
{
const
FVEC
zero
=
{};
const
FVEC
allOne
(
1.0
f
);
const
FVEC
allTwo
(
2.0
f
);
// Note that different targets use different types of mask, so rather than checking
// the mask directly, instead check the output of using the mask as a blend to provide
// an indirect test.
const
auto
maskNone
=
FVEC
::
expandBitsToMask
(
0
);
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
maskNone
),
allOne
);
ASSERT_VEC_EQUAL
(
blendZero
(
allOne
,
maskNone
),
zero
);
const
auto
maskAll
=
FVEC
::
expandBitsToMask
(
-
1
);
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
maskAll
),
allTwo
);
ASSERT_VEC_EQUAL
(
blendZero
(
allOne
,
maskAll
),
allOne
);
// Repeating pattern big enough to do most SIMD lengths.
const
int
bitmask
=
0b1100001101101001
;
const
auto
maskSome
=
FVEC
::
expandBitsToMask
(
bitmask
);
float
expectedMaskSome
[
numElements
];
float
expectedZeroMaskSome
[
numElements
];
for
(
int
i
=
0
;
i
<
numElements
;
++
i
)
{
expectedMaskSome
[
i
]
=
(
bitmask
&
(
1
<<
i
))
?
2.0
f
:
1.0
f
;
expectedZeroMaskSome
[
i
]
=
(
bitmask
&
(
1
<<
i
))
?
2.0
f
:
0.0
f
;
}
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
maskSome
),
expectedMaskSome
);
ASSERT_VEC_EQUAL
(
blendZero
(
allTwo
,
maskSome
),
expectedZeroMaskSome
);
// Test comparisons too, using random numbers, and then blending in either 0 or 1.
const
auto
v0
=
getRandomFvec
();
const
auto
v1
=
getRandomFvec
();
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
v0
<
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
x
<
y
?
2.0
f
:
1.0
f
;
}));
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
v0
<=
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
x
<=
y
?
2.0
f
:
1.0
f
;
}));
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
v0
<=
v0
),
allTwo
);
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
v0
>
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
x
>
y
?
2.0
f
:
1.0
f
;
}));
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
v0
>=
v1
),
applyBinaryFn
(
v0
,
v1
,
[](
float
x
,
float
y
)
{
return
x
>=
y
?
2.0
f
:
1.0
f
;
}));
ASSERT_VEC_EQUAL
(
blend
(
allOne
,
allTwo
,
v0
>=
v0
),
allTwo
);
}
template
<
typename
FVEC
>
void
TestFvec
<
FVEC
>::
testUtilities
()
const
{
/** Use rounded (i.e., integer) values for the reductions. Reduction operations are very sensitive
* to ordering. The correct result is found by sorting values into ascending order to ensure that
* similar sized numbers are accumulated earlier than less similar numbers. If completely random
* numbers were used, this effect would show up here, making it more a test of what random numbers
* you got, than of the code itself. By rounding to integers, the numbers will behave sanely for the
* reduction, meaning it is a test of the reduction, and not of the format.
*/
const
auto
v0
=
round
(
getRandomFvec
());
const
auto
v1
=
round
(
getRandomFvec
());
const
auto
v2
=
round
(
getRandomFvec
());
const
float
*
v0p
=
(
const
float
*
)
&
v0
;
const
float
*
v1p
=
(
const
float
*
)
&
v1
;
const
float
*
v2p
=
(
const
float
*
)
&
v2
;
const
auto
expectedRedAddV0
=
std
::
accumulate
(
v0p
,
v0p
+
numElements
,
0.0
f
);
const
auto
expectedRedAddV1
=
std
::
accumulate
(
v1p
,
v1p
+
numElements
,
0.0
f
);
const
auto
expectedRedAddV2
=
std
::
accumulate
(
v2p
,
v2p
+
numElements
,
0.0
f
);
ASSERT_VEC_EQUAL
(
reduceAdd
(
v0
),
expectedRedAddV0
);
// Reduction of three vectors by addition into a single 3-element vector. Note that the final element
// of the reduction is undefined, so the expected value copies over whatever that undefined value is.
const
auto
computedRed3
=
reduceToVec3
(
v0
,
v1
,
v2
);
const
auto
expectedRed3
=
fvec4
(
expectedRedAddV0
,
expectedRedAddV1
,
expectedRedAddV2
,
computedRed3
[
3
]);
ASSERT_VEC_EQUAL
(
computedRed3
,
expectedRed3
);
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment