Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0412e25d
Commit
0412e25d
authored
Oct 01, 2015
by
Peter Eastman
Browse files
Fixed compilation errors on Windows
parent
8d0fee51
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
10 deletions
+13
-10
libraries/vecmath/include/neon_mathfun.h
libraries/vecmath/include/neon_mathfun.h
+6
-5
libraries/vecmath/include/sse_mathfun.h
libraries/vecmath/include/sse_mathfun.h
+6
-5
libraries/vecmath/src/vecmath.cpp
libraries/vecmath/src/vecmath.cpp
+1
-0
No files found.
libraries/vecmath/include/neon_mathfun.h
View file @
0412e25d
...
...
@@ -26,6 +26,7 @@
*/
#include <arm_neon.h>
#include "openmm/internal/windowsExport.h"
typedef
float32x4_t
v4sf
;
// vector of 4 float
typedef
uint32x4_t
v4su
;
// vector of 4 uint32
...
...
@@ -48,7 +49,7 @@ typedef int32x4_t v4si; // vector of 4 uint32
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
v4sf
log_ps
(
v4sf
x
)
{
OPENMM_EXPORT
v4sf
log_ps
(
v4sf
x
)
{
v4sf
one
=
vdupq_n_f32
(
1
);
x
=
vmaxq_f32
(
x
,
vdupq_n_f32
(
0
));
/* force flush to zero on denormal values */
...
...
@@ -133,7 +134,7 @@ v4sf log_ps(v4sf x) {
#define c_cephes_exp_p5 5.0000001201E-1
/* exp() computed for 4 float at once */
v4sf
exp_ps
(
v4sf
x
)
{
OPENMM_EXPORT
v4sf
exp_ps
(
v4sf
x
)
{
v4sf
tmp
,
fx
;
v4sf
one
=
vdupq_n_f32
(
1
);
...
...
@@ -219,7 +220,7 @@ v4sf exp_ps(v4sf x) {
almost no extra price so both sin_ps and cos_ps make use of
sincos_ps..
*/
void
sincos_ps
(
v4sf
x
,
v4sf
*
ysin
,
v4sf
*
ycos
)
{
// any x
OPENMM_EXPORT
void
sincos_ps
(
v4sf
x
,
v4sf
*
ysin
,
v4sf
*
ycos
)
{
// any x
v4sf
xmm1
,
xmm2
,
xmm3
,
y
;
v4su
emm2
;
...
...
@@ -286,13 +287,13 @@ void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
*
ycos
=
vbslq_f32
(
sign_mask_cos
,
yc
,
vnegq_f32
(
yc
));
}
v4sf
sin_ps
(
v4sf
x
)
{
OPENMM_EXPORT
v4sf
sin_ps
(
v4sf
x
)
{
v4sf
ysin
,
ycos
;
sincos_ps
(
x
,
&
ysin
,
&
ycos
);
return
ysin
;
}
v4sf
cos_ps
(
v4sf
x
)
{
OPENMM_EXPORT
v4sf
cos_ps
(
v4sf
x
)
{
v4sf
ysin
,
ycos
;
sincos_ps
(
x
,
&
ysin
,
&
ycos
);
return
ycos
;
...
...
libraries/vecmath/include/sse_mathfun.h
View file @
0412e25d
...
...
@@ -30,6 +30,7 @@
*/
#include <xmmintrin.h>
#include "openmm/internal/windowsExport.h"
/* yes I know, the top of this file is quite ugly */
...
...
@@ -109,7 +110,7 @@ typedef union xmm_mm_union {
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
v4sf
log_ps
(
v4sf
x
)
{
OPENMM_EXPORT
v4sf
log_ps
(
v4sf
x
)
{
#ifdef USE_SSE2
v4si
emm0
;
#else
...
...
@@ -211,7 +212,7 @@ _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
_PS_CONST
(
cephes_exp_p4
,
1.6666665459E-1
);
_PS_CONST
(
cephes_exp_p5
,
5.0000001201E-1
);
v4sf
exp_ps
(
v4sf
x
)
{
OPENMM_EXPORT
v4sf
exp_ps
(
v4sf
x
)
{
v4sf
tmp
=
_mm_setzero_ps
(),
fx
;
#ifdef USE_SSE2
v4si
emm0
;
...
...
@@ -329,7 +330,7 @@ _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
Since it is based on SSE intrinsics, it has to be compiled at -O2 to
deliver full speed.
*/
v4sf
sin_ps
(
v4sf
x
)
{
// any x
OPENMM_EXPORT
v4sf
sin_ps
(
v4sf
x
)
{
// any x
v4sf
xmm1
,
xmm2
=
_mm_setzero_ps
(),
xmm3
,
sign_bit
,
y
;
#ifdef USE_SSE2
...
...
@@ -446,7 +447,7 @@ v4sf sin_ps(v4sf x) { // any x
}
/* almost the same as sin_ps */
v4sf
cos_ps
(
v4sf
x
)
{
// any x
OPENMM_EXPORT
v4sf
cos_ps
(
v4sf
x
)
{
// any x
v4sf
xmm1
,
xmm2
=
_mm_setzero_ps
(),
xmm3
,
y
;
#ifdef USE_SSE2
v4si
emm0
,
emm2
;
...
...
@@ -565,7 +566,7 @@ v4sf cos_ps(v4sf x) { // any x
/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
it is almost as fast, and gives you a free cosine with your sine */
void
sincos_ps
(
v4sf
x
,
v4sf
*
s
,
v4sf
*
c
)
{
OPENMM_EXPORT
void
sincos_ps
(
v4sf
x
,
v4sf
*
s
,
v4sf
*
c
)
{
v4sf
xmm1
,
xmm2
,
xmm3
=
_mm_setzero_ps
(),
sign_bit_sin
,
y
;
#ifdef USE_SSE2
v4si
emm0
,
emm2
,
emm4
;
...
...
libraries/vecmath/src/vecmath.cpp
View file @
0412e25d
...
...
@@ -2,6 +2,7 @@
#include "neon_mathfun.h"
#else
#if !defined(__PNACL__)
#define USE_SSE2
#include "sse_mathfun.h"
#endif
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment