Commit 0412e25d authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed compilation errors on Windows

parent 8d0fee51
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
*/ */
#include <arm_neon.h> #include <arm_neon.h>
#include "openmm/internal/windowsExport.h"
typedef float32x4_t v4sf; // vector of 4 float typedef float32x4_t v4sf; // vector of 4 float
typedef uint32x4_t v4su; // vector of 4 uint32 typedef uint32x4_t v4su; // vector of 4 uint32
...@@ -48,7 +49,7 @@ typedef int32x4_t v4si; // vector of 4 uint32 ...@@ -48,7 +49,7 @@ typedef int32x4_t v4si; // vector of 4 uint32
/* natural logarithm computed for 4 simultaneous float /* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0 return NaN for x <= 0
*/ */
v4sf log_ps(v4sf x) { OPENMM_EXPORT v4sf log_ps(v4sf x) {
v4sf one = vdupq_n_f32(1); v4sf one = vdupq_n_f32(1);
x = vmaxq_f32(x, vdupq_n_f32(0)); /* force flush to zero on denormal values */ x = vmaxq_f32(x, vdupq_n_f32(0)); /* force flush to zero on denormal values */
...@@ -133,7 +134,7 @@ v4sf log_ps(v4sf x) { ...@@ -133,7 +134,7 @@ v4sf log_ps(v4sf x) {
#define c_cephes_exp_p5 5.0000001201E-1 #define c_cephes_exp_p5 5.0000001201E-1
/* exp() computed for 4 float at once */ /* exp() computed for 4 float at once */
v4sf exp_ps(v4sf x) { OPENMM_EXPORT v4sf exp_ps(v4sf x) {
v4sf tmp, fx; v4sf tmp, fx;
v4sf one = vdupq_n_f32(1); v4sf one = vdupq_n_f32(1);
...@@ -219,7 +220,7 @@ v4sf exp_ps(v4sf x) { ...@@ -219,7 +220,7 @@ v4sf exp_ps(v4sf x) {
almost no extra price so both sin_ps and cos_ps make use of almost no extra price so both sin_ps and cos_ps make use of
sincos_ps.. sincos_ps..
*/ */
void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x OPENMM_EXPORT void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
v4sf xmm1, xmm2, xmm3, y; v4sf xmm1, xmm2, xmm3, y;
v4su emm2; v4su emm2;
...@@ -286,13 +287,13 @@ void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x ...@@ -286,13 +287,13 @@ void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
*ycos = vbslq_f32(sign_mask_cos, yc, vnegq_f32(yc)); *ycos = vbslq_f32(sign_mask_cos, yc, vnegq_f32(yc));
} }
v4sf sin_ps(v4sf x) { OPENMM_EXPORT v4sf sin_ps(v4sf x) {
v4sf ysin, ycos; v4sf ysin, ycos;
sincos_ps(x, &ysin, &ycos); sincos_ps(x, &ysin, &ycos);
return ysin; return ysin;
} }
v4sf cos_ps(v4sf x) { OPENMM_EXPORT v4sf cos_ps(v4sf x) {
v4sf ysin, ycos; v4sf ysin, ycos;
sincos_ps(x, &ysin, &ycos); sincos_ps(x, &ysin, &ycos);
return ycos; return ycos;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
*/ */
#include <xmmintrin.h> #include <xmmintrin.h>
#include "openmm/internal/windowsExport.h"
/* yes I know, the top of this file is quite ugly */ /* yes I know, the top of this file is quite ugly */
...@@ -109,7 +110,7 @@ typedef union xmm_mm_union { ...@@ -109,7 +110,7 @@ typedef union xmm_mm_union {
/* natural logarithm computed for 4 simultaneous float /* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0 return NaN for x <= 0
*/ */
v4sf log_ps(v4sf x) { OPENMM_EXPORT v4sf log_ps(v4sf x) {
#ifdef USE_SSE2 #ifdef USE_SSE2
v4si emm0; v4si emm0;
#else #else
...@@ -211,7 +212,7 @@ _PS_CONST(cephes_exp_p3, 4.1665795894E-2); ...@@ -211,7 +212,7 @@ _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
_PS_CONST(cephes_exp_p4, 1.6666665459E-1); _PS_CONST(cephes_exp_p4, 1.6666665459E-1);
_PS_CONST(cephes_exp_p5, 5.0000001201E-1); _PS_CONST(cephes_exp_p5, 5.0000001201E-1);
v4sf exp_ps(v4sf x) { OPENMM_EXPORT v4sf exp_ps(v4sf x) {
v4sf tmp = _mm_setzero_ps(), fx; v4sf tmp = _mm_setzero_ps(), fx;
#ifdef USE_SSE2 #ifdef USE_SSE2
v4si emm0; v4si emm0;
...@@ -329,7 +330,7 @@ _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI ...@@ -329,7 +330,7 @@ _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
Since it is based on SSE intrinsics, it has to be compiled at -O2 to Since it is based on SSE intrinsics, it has to be compiled at -O2 to
deliver full speed. deliver full speed.
*/ */
v4sf sin_ps(v4sf x) { // any x OPENMM_EXPORT v4sf sin_ps(v4sf x) { // any x
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y; v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
#ifdef USE_SSE2 #ifdef USE_SSE2
...@@ -446,7 +447,7 @@ v4sf sin_ps(v4sf x) { // any x ...@@ -446,7 +447,7 @@ v4sf sin_ps(v4sf x) { // any x
} }
/* almost the same as sin_ps */ /* almost the same as sin_ps */
v4sf cos_ps(v4sf x) { // any x OPENMM_EXPORT v4sf cos_ps(v4sf x) { // any x
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y; v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
#ifdef USE_SSE2 #ifdef USE_SSE2
v4si emm0, emm2; v4si emm0, emm2;
...@@ -565,7 +566,7 @@ v4sf cos_ps(v4sf x) { // any x ...@@ -565,7 +566,7 @@ v4sf cos_ps(v4sf x) { // any x
/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them.. /* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
it is almost as fast, and gives you a free cosine with your sine */ it is almost as fast, and gives you a free cosine with your sine */
void sincos_ps(v4sf x, v4sf *s, v4sf *c) { OPENMM_EXPORT void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y; v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
#ifdef USE_SSE2 #ifdef USE_SSE2
v4si emm0, emm2, emm4; v4si emm0, emm2, emm4;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include "neon_mathfun.h" #include "neon_mathfun.h"
#else #else
#if !defined(__PNACL__) #if !defined(__PNACL__)
#define USE_SSE2
#include "sse_mathfun.h" #include "sse_mathfun.h"
#endif #endif
#endif #endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment