Commit 0412e25d authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed compilation errors on Windows

parent 8d0fee51
......@@ -26,6 +26,7 @@
*/
#include <arm_neon.h>
#include "openmm/internal/windowsExport.h"
typedef float32x4_t v4sf; // vector of 4 float
typedef uint32x4_t v4su; // vector of 4 uint32
......@@ -48,7 +49,7 @@ typedef int32x4_t v4si; // vector of 4 uint32
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
v4sf log_ps(v4sf x) {
OPENMM_EXPORT v4sf log_ps(v4sf x) {
v4sf one = vdupq_n_f32(1);
x = vmaxq_f32(x, vdupq_n_f32(0)); /* force flush to zero on denormal values */
......@@ -133,7 +134,7 @@ v4sf log_ps(v4sf x) {
#define c_cephes_exp_p5 5.0000001201E-1
/* exp() computed for 4 float at once */
v4sf exp_ps(v4sf x) {
OPENMM_EXPORT v4sf exp_ps(v4sf x) {
v4sf tmp, fx;
v4sf one = vdupq_n_f32(1);
......@@ -219,7 +220,7 @@ v4sf exp_ps(v4sf x) {
almost no extra price so both sin_ps and cos_ps make use of
sincos_ps..
*/
void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
OPENMM_EXPORT void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
v4sf xmm1, xmm2, xmm3, y;
v4su emm2;
......@@ -286,13 +287,13 @@ void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
*ycos = vbslq_f32(sign_mask_cos, yc, vnegq_f32(yc));
}
v4sf sin_ps(v4sf x) {
OPENMM_EXPORT v4sf sin_ps(v4sf x) {
v4sf ysin, ycos;
sincos_ps(x, &ysin, &ycos);
return ysin;
}
v4sf cos_ps(v4sf x) {
OPENMM_EXPORT v4sf cos_ps(v4sf x) {
v4sf ysin, ycos;
sincos_ps(x, &ysin, &ycos);
return ycos;
......
......@@ -30,6 +30,7 @@
*/
#include <xmmintrin.h>
#include "openmm/internal/windowsExport.h"
/* yes I know, the top of this file is quite ugly */
......@@ -109,7 +110,7 @@ typedef union xmm_mm_union {
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
v4sf log_ps(v4sf x) {
OPENMM_EXPORT v4sf log_ps(v4sf x) {
#ifdef USE_SSE2
v4si emm0;
#else
......@@ -211,7 +212,7 @@ _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
v4sf exp_ps(v4sf x) {
OPENMM_EXPORT v4sf exp_ps(v4sf x) {
v4sf tmp = _mm_setzero_ps(), fx;
#ifdef USE_SSE2
v4si emm0;
......@@ -329,7 +330,7 @@ _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
Since it is based on SSE intrinsics, it has to be compiled at -O2 to
deliver full speed.
*/
v4sf sin_ps(v4sf x) { // any x
OPENMM_EXPORT v4sf sin_ps(v4sf x) { // any x
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
#ifdef USE_SSE2
......@@ -446,7 +447,7 @@ v4sf sin_ps(v4sf x) { // any x
}
/* almost the same as sin_ps */
v4sf cos_ps(v4sf x) { // any x
OPENMM_EXPORT v4sf cos_ps(v4sf x) { // any x
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
#ifdef USE_SSE2
v4si emm0, emm2;
......@@ -565,7 +566,7 @@ v4sf cos_ps(v4sf x) { // any x
/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
it is almost as fast, and gives you a free cosine with your sine */
void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
OPENMM_EXPORT void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
#ifdef USE_SSE2
v4si emm0, emm2, emm4;
......
......@@ -2,6 +2,7 @@
#include "neon_mathfun.h"
#else
#if !defined(__PNACL__)
#define USE_SSE2
#include "sse_mathfun.h"
#endif
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment