Portable.h 3.52 KB
Newer Older
Tim Dettmers's avatar
Tim Dettmers committed
1
2
3
4
5
6
#pragma once
#include <limits>
#include <cmath>
#include <stdexcept>
#include <sstream>

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#if defined(__aarch64__)
#ifdef __CUDACC__
#undef USE_NEON // Doesn't work with nvcc, undefined symbols
#else
#include <arm_neon.h>
#undef USE_NEON // Not yet implemented
#endif
#undef USE_AVX // x86_64 only
#undef USE_AVX2 // x86_64 only
#undef USE_SSE2 // x86_64 only
#undef USE_SSE41 // x86_64 only
#undef USE_SSE42 // x86_64 only
#undef USE_FMA // x86_64 only
#ifdef USE_NEON
typedef float32x4_t __m128;
typedef int32x4_t __m128i;
typedef float64x2_t __m128d;
#else
typedef struct {float a; float b; float c; float d;} __m128;
typedef struct {int a; int b; int c; int d;} __m128i;
typedef struct {double a; double b;} __m128d;
#endif
#else
#undef USE_NEON // ARM64 only
Tim Dettmers's avatar
Tim Dettmers committed
31
32
33
#ifdef __FMA__
#define USE_FMA
#endif
34
35
36
37
#if !defined(__SSE2__) && !defined(_MSC_VER)
#error Compiler must support SSE2
#endif
#define USE_SSE2
Tim Dettmers's avatar
Tim Dettmers committed
38

39
40
#if defined(__aarch64__)
#else
Tim Dettmers's avatar
Tim Dettmers committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#ifdef __AVX2__
#define USE_AVX2
#endif

#ifdef __AVX__
#define USE_AVX
#endif


#ifdef __SSE4_1__
#define USE_SSE41
#endif

#ifdef __SSE4_2__
#define USE_SSE42
#endif
57
58
#endif
#endif
Tim Dettmers's avatar
Tim Dettmers committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180

#ifndef _MSC_VER
#include <stdint.h>
#endif

namespace BinSearch {

#ifndef _MSC_VER
typedef  int8_t   int8;
typedef uint8_t  uint8;
typedef  int32_t   int32;
typedef uint32_t  uint32;
typedef  int64_t   int64;
typedef uint64_t  uint64;
#else
typedef  __int8   int8;
typedef unsigned __int8  uint8;
typedef  __int32   int32;
typedef unsigned __int32  uint32;
typedef  __int64   int64;
typedef unsigned __int64  uint64;
#endif

namespace Details {

#define myassert(cond, msg) if (!cond){ std::ostringstream os; os << "\nassertion failed: " << #cond << ", " << msg << "\n"; throw std::invalid_argument(os.str()); }

// log2 is not defined in VS2008
#if defined(_MSC_VER)
inline uint32 log2 (uint32 val) {
    if (val == 1) return 0;
    uint32 ret = 0;
    do {
        ret++;
        val >>= 1;
    } while (val > 1);
    return ret;
}
#endif

#ifdef _DEBUG
#define DEBUG
#endif

#ifdef _MSC_VER
#   define FORCE_INLINE __forceinline
#   define NO_INLINE __declspec(noinline)
#else
#   define NO_INLINE __attribute__((noinline))
#   ifdef DEBUG
#       define FORCE_INLINE NO_INLINE
#   else
#       define FORCE_INLINE __attribute__((always_inline)) inline
#   endif
#endif

#ifdef USE_AVX
#define COMISS "vcomiss"
#define COMISD "vcomisd"
#else
#define COMISS "comiss"
#define COMISD "comisd"
#endif

// nextafter is not defined in VS2008
#if defined(_MSC_VER) && (_MSC_VER <= 1500)
#include <float.h>
inline float mynext(float x)
{
    return _nextafterf(x, std::numeric_limits<float>::max());
}

inline double mynext(double x)
{
    return _nextafter(x, std::numeric_limits<double>::max());
}
inline float myprev(float x)
{
    return _nextafterf(x, -std::numeric_limits<float>::max());
}

inline double myprev(double x)
{
    return _nextafter(x, -std::numeric_limits<double>::max());
}
#else
inline float mynext(float x)
{
    return std::nextafterf(x, std::numeric_limits<float>::max());
}

inline double mynext(double x)
{
    return std::nextafter(x, std::numeric_limits<double>::max());
}
inline float myprev(float x)
{
    return std::nextafterf(x, -std::numeric_limits<float>::max());
}

inline double myprev(double x)
{
    return std::nextafter(x, -std::numeric_limits<double>::max());
}
#endif

template <typename T>
inline T next(T x)
{
    for (int i = 0; i < 4; ++i)
        x = mynext(x);
    return x;
}

template <typename T>
inline T prev(T x)
{
    for (int i = 0; i < 4; ++i)
        x = myprev(x);
    return x;
}

181
} // namespace Details
Tim Dettmers's avatar
Tim Dettmers committed
182
} // namespace BinSearch