Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
99a9fdc9
"platforms/reference/include/ReferencePointFunctions.h" did not exist on "2da337e981e383e2c0fcd94d3c07e04b4ed627cc"
Unverified
Commit
99a9fdc9
authored
Apr 08, 2022
by
Peter Eastman
Committed by
GitHub
Apr 08, 2022
Browse files
Improvements to vectorization on ARM (#3555)
parent
fd13a655
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
25 deletions
+6
-25
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+6
-25
No files found.
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
99a9fdc9
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2013-20
14
Stanford University and the Authors. *
* Portions copyright (c) 2013-20
22
Stanford University and the Authors. *
* Authors: Mateus Lima, Peter Eastman *
* Authors: Mateus Lima, Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -130,13 +130,7 @@ public:
...
@@ -130,13 +130,7 @@ public:
return
vmulq_f32
(
val
,
other
);
return
vmulq_f32
(
val
,
other
);
}
}
fvec4
operator
/
(
fvec4
other
)
const
{
fvec4
operator
/
(
fvec4
other
)
const
{
// NEON does not have a divide float-point operator, so we get the reciprocal and multiply.
return
vdivq_f32
(
val
,
other
);
float32x4_t
reciprocal
=
vrecpeq_f32
(
other
);
reciprocal
=
vmulq_f32
(
vrecpsq_f32
(
other
,
reciprocal
),
reciprocal
);
reciprocal
=
vmulq_f32
(
vrecpsq_f32
(
other
,
reciprocal
),
reciprocal
);
fvec4
result
=
vmulq_f32
(
val
,
reciprocal
);
return
result
;
}
}
void
operator
+=
(
fvec4
other
)
{
void
operator
+=
(
fvec4
other
)
{
val
=
vaddq_f32
(
val
,
other
);
val
=
vaddq_f32
(
val
,
other
);
...
@@ -337,16 +331,11 @@ static inline float dot3(fvec4 v1, fvec4 v2) {
...
@@ -337,16 +331,11 @@ static inline float dot3(fvec4 v1, fvec4 v2) {
}
}
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
fvec4
result
=
v1
*
v2
;
return
vaddvq_f32
(
v1
*
v2
);
return
vgetq_lane_f32
(
result
,
0
)
+
vgetq_lane_f32
(
result
,
1
)
+
vgetq_lane_f32
(
result
,
2
)
+
vgetq_lane_f32
(
result
,
3
);
}
}
static
inline
float
reduceAdd
(
fvec4
v
)
{
static
inline
float
reduceAdd
(
fvec4
v
)
{
#ifdef __ARM64__
return
vaddvq_f32
(
v
);
return
vaddvq_f32
(
v
);
#else
return
dot4
(
v
,
fvec4
(
1.0
f
));
#endif
}
}
static
inline
fvec4
cross
(
fvec4
v1
,
fvec4
v2
)
{
static
inline
fvec4
cross
(
fvec4
v1
,
fvec4
v2
)
{
...
@@ -397,11 +386,7 @@ static inline ivec4 abs(ivec4 v) {
...
@@ -397,11 +386,7 @@ static inline ivec4 abs(ivec4 v) {
}
}
static
inline
bool
any
(
ivec4
v
)
{
static
inline
bool
any
(
ivec4
v
)
{
#ifdef __ARM64__
return
(
vmaxvq_u32
(
vreinterpretq_u32_s32
(
v
))
!=
0
);
return
(
vmaxvq_u32
(
vreinterpretq_u32_s32
(
v
))
!=
0
);
#else
return
(
vgetq_lane_s32
(
v
,
0
)
!=
0
||
vgetq_lane_s32
(
v
,
1
)
!=
0
||
vgetq_lane_s32
(
v
,
2
)
!=
0
||
vgetq_lane_s32
(
v
,
3
)
!=
0
);
#endif
}
}
// Mathematical operators involving a scalar and a vector.
// Mathematical operators involving a scalar and a vector.
...
@@ -439,19 +424,15 @@ static inline ivec4 blendZero(ivec4 v, ivec4 mask) {
...
@@ -439,19 +424,15 @@ static inline ivec4 blendZero(ivec4 v, ivec4 mask) {
// These are at the end since they involve other functions defined above.
// These are at the end since they involve other functions defined above.
static
inline
fvec4
round
(
fvec4
v
)
{
static
inline
fvec4
round
(
fvec4
v
)
{
fvec4
shift
(
0x1
.0
p23f
);
return
vrndnq_f32
(
v
);
fvec4
absResult
=
(
abs
(
v
)
+
shift
)
-
shift
;
return
blend
(
v
,
absResult
,
ivec4
(
0x7FFFFFFF
));
}
}
static
inline
fvec4
floor
(
fvec4
v
)
{
static
inline
fvec4
floor
(
fvec4
v
)
{
fvec4
rounded
=
round
(
v
);
return
vrndmq_f32
(
v
);
return
rounded
+
blend
(
0.0
f
,
-
1.0
f
,
rounded
>
v
);
}
}
static
inline
fvec4
ceil
(
fvec4
v
)
{
static
inline
fvec4
ceil
(
fvec4
v
)
{
fvec4
rounded
=
round
(
v
);
return
vrndpq_f32
(
v
);
return
rounded
+
blend
(
0.0
f
,
1.0
f
,
rounded
<
v
);
}
}
/* Given a table of floating-point values and a set of indexes, perform a gather read into a pair
/* Given a table of floating-point values and a set of indexes, perform a gather read into a pair
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment