Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
80d36f43
Commit
80d36f43
authored
Nov 10, 2013
by
Davis King
Browse files
Fleshed out the AVX SIMD support
parent
4fec4476
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
609 additions
and
15 deletions
+609
-15
dlib/simd.h
dlib/simd.h
+1
-0
dlib/simd/simd8f.h
dlib/simd/simd8f.h
+244
-0
dlib/simd/simd8i.h
dlib/simd/simd8i.h
+339
-0
dlib/simd/simd_check.h
dlib/simd/simd_check.h
+25
-15
No files found.
dlib/simd.h
View file @
80d36f43
...
...
@@ -6,6 +6,7 @@
#include "simd/simd4f.h"
#include "simd/simd4i.h"
#include "simd/simd8f.h"
#include "simd/simd8i.h"
#endif // DLIB_SIMd_H__
dlib/simd/simd8f.h
View file @
80d36f43
...
...
@@ -5,6 +5,7 @@
#include "simd_check.h"
#include "simd4f.h"
#include "simd8i.h"
namespace
dlib
...
...
@@ -24,6 +25,7 @@ namespace dlib
inline
simd8f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
,
float
r4
,
float
r5
,
float
r6
,
float
r7
)
{
x
=
_mm256_setr_ps
(
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
}
simd8f
(
const
simd8i
&
val
)
:
x
(
_mm256_cvtepi32_ps
(
val
))
{}
simd8f
(
const
__m256
&
val
)
:
x
(
val
)
{}
simd8f
&
operator
=
(
const
__m256
&
val
)
{
...
...
@@ -32,6 +34,9 @@ namespace dlib
}
inline
operator
__m256
()
const
{
return
x
;
}
// truncate to 32bit integers
operator
__m256i
()
const
{
return
_mm256_cvttps_epi32
(
x
);
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_ps
(
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_ps
(
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_ps
(
ptr
);
}
...
...
@@ -51,6 +56,33 @@ namespace dlib
private:
__m256
x
;
};
class
simd8f_bool
{
public:
typedef
float
type
;
simd8f_bool
()
{}
simd8f_bool
(
const
__m256
&
val
)
:
x
(
val
)
{}
simd8f_bool
(
const
simd4f_bool
&
low
,
const
simd4f_bool
&
high
)
{
x
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
low
),
high
,
1
);
}
simd8f_bool
&
operator
=
(
const
__m256
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m256
()
const
{
return
x
;
}
private:
__m256
x
;
};
#else
class
simd8f
{
...
...
@@ -62,6 +94,16 @@ namespace dlib
simd8f
(
float
f
)
:
_low
(
f
),
_high
(
f
)
{}
simd8f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
,
float
r4
,
float
r5
,
float
r6
,
float
r7
)
:
_low
(
r0
,
r1
,
r2
,
r3
),
_high
(
r4
,
r5
,
r6
,
r7
)
{}
simd8f
(
const
simd8i
&
val
)
:
_low
(
val
.
low
()),
_high
(
val
.
high
())
{
}
// truncate to 32bit integers
operator
simd8i
::
rawarray
()
const
{
simd8i
::
rawarray
temp
;
temp
.
low
=
_low
;
temp
.
high
=
_high
;
return
temp
;
}
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
...
...
@@ -83,6 +125,21 @@ namespace dlib
private:
simd4f
_low
,
_high
;
};
class
simd8f_bool
{
public:
typedef
float
type
;
simd8f_bool
()
{}
simd8f_bool
(
const
simd4f_bool
&
low_
,
const
simd4f_bool
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
simd4f_bool
low
()
const
{
return
_low
;
}
simd4f_bool
high
()
const
{
return
_high
;
}
private:
simd4f_bool
_low
,
_high
;
};
#endif
// ----------------------------------------------------------------------------------------
...
...
@@ -110,6 +167,20 @@ namespace dlib
inline
simd8f
&
operator
+=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
+
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f
operator
-
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_sub_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
lhs
.
low
()
-
rhs
.
low
(),
lhs
.
high
()
-
rhs
.
high
());
#endif
}
inline
simd8f
&
operator
-=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
-
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f
operator
*
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
...
...
@@ -124,6 +195,130 @@ namespace dlib
inline
simd8f
&
operator
*=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
*
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f
operator
/
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_div_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
lhs
.
low
()
/
rhs
.
low
(),
lhs
.
high
()
/
rhs
.
high
());
#endif
}
inline
simd8f
&
operator
/=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
/
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
==
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
0
);
#else
return
simd8f_bool
(
lhs
.
low
()
==
rhs
.
low
(),
lhs
.
high
()
==
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
!=
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
4
);
#else
return
simd8f_bool
(
lhs
.
low
()
!=
rhs
.
low
(),
lhs
.
high
()
!=
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
<
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
1
);
#else
return
simd8f_bool
(
lhs
.
low
()
<
rhs
.
low
(),
lhs
.
high
()
<
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
>
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
rhs
<
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
<=
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
2
);
#else
return
simd8f_bool
(
lhs
.
low
()
<=
rhs
.
low
(),
lhs
.
high
()
<=
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
>=
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
rhs
<=
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8f
min
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_min_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
min
(
lhs
.
low
(),
rhs
.
low
()),
min
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
max
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_max_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
max
(
lhs
.
low
(),
rhs
.
low
()),
max
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
reciprocal
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_rcp_ps
(
item
);
#else
return
simd8f
(
reciprocal
(
item
.
low
()),
reciprocal
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
reciprocal_sqrt
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_rsqrt_ps
(
item
);
#else
return
simd8f
(
reciprocal_sqrt
(
item
.
low
()),
reciprocal_sqrt
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
float
sum
(
const
simd8f
&
item
)
...
...
@@ -144,6 +339,55 @@ namespace dlib
return
sum
(
lhs
*
rhs
);
}
// ----------------------------------------------------------------------------------------
inline
simd8f
sqrt
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_sqrt_ps
(
item
);
#else
return
simd8f
(
sqrt
(
item
.
low
()),
sqrt
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
ceil
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_ceil_ps
(
item
);
#else
return
simd8f
(
ceil
(
item
.
low
()),
ceil
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
floor
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_floor_ps
(
item
);
#else
return
simd8f
(
floor
(
item
.
low
()),
floor
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
// perform cmp ? a : b
inline
simd8f
select
(
const
simd8f_bool
&
cmp
,
const
simd8f
&
a
,
const
simd8f
&
b
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_blendv_ps
(
b
,
a
,
cmp
);
#else
return
simd8f
(
select
(
cmp
.
low
(),
a
.
low
(),
b
.
low
()),
select
(
cmp
.
high
(),
a
.
high
(),
b
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
}
...
...
dlib/simd/simd8i.h
0 → 100644
View file @
80d36f43
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_sIMD8I_H__
#define DLIB_sIMD8I_H__
#include "simd_check.h"
#include "../uintn.h"
namespace
dlib
{
#ifdef DLIB_HAVE_AVX
class
simd8i
{
public:
typedef
int32
type
;
simd8i
()
{}
simd8i
(
int32
f
)
{
x
=
_mm256_set1_epi32
(
f
);
}
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
int32
r4
,
int32
r5
,
int32
r6
,
int32
r7
)
{
x
=
_mm256_setr_epi32
(
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
}
simd8i
(
const
__m256i
&
val
)
:
x
(
val
)
{}
simd8i
(
const
simd4i
&
low
,
const
simd4i
&
high
)
{
x
=
_mm256_insertf128_si256
(
_mm256_castsi128_si256
(
low
),
high
,
1
);
}
simd8i
&
operator
=
(
const
__m256i
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m256i
()
const
{
return
x
;
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_si256
((
const
__m256i
*
)
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_si256
((
__m256i
*
)
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_si256
((
const
__m256i
*
)
ptr
);
}
void
store
(
type
*
ptr
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
ptr
,
x
);
}
simd4i
low
()
const
{
return
_mm256_castsi256_si128
(
x
);
}
simd4i
high
()
const
{
return
_mm256_extractf128_si256
(
x
,
1
);
}
unsigned
int
size
()
const
{
return
4
;
}
int32
operator
[](
unsigned
int
idx
)
const
{
int32
temp
[
8
];
store
(
temp
);
return
temp
[
idx
];
}
private:
__m256i
x
;
};
#else
class
simd8i
{
public:
typedef
int32
type
;
simd8i
()
{}
simd8i
(
const
simd4i
&
low_
,
const
simd4i
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
simd8i
(
int32
f
)
:
_low
(
f
),
_high
(
f
)
{}
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
int32
r4
,
int32
r5
,
int32
r6
,
int32
r7
)
:
_low
(
r0
,
r1
,
r2
,
r3
),
_high
(
r4
,
r5
,
r6
,
r7
)
{}
struct
rawarray
{
simd4i
low
,
high
;
};
simd8i
(
const
rawarray
&
a
)
{
_low
=
a
.
low
;
_high
=
a
.
high
;
}
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
void
load
(
const
type
*
ptr
)
{
_low
.
load
(
ptr
);
_high
.
load
(
ptr
+
4
);
}
void
store
(
type
*
ptr
)
const
{
_low
.
store
(
ptr
);
_high
.
store
(
ptr
+
4
);
}
unsigned
int
size
()
const
{
return
8
;
}
int32
operator
[](
unsigned
int
idx
)
const
{
if
(
idx
<
4
)
return
_low
[
idx
];
else
return
_high
[
idx
-
4
];
}
simd4i
low
()
const
{
return
_low
;
}
simd4i
high
()
const
{
return
_high
;
}
private:
simd4i
_low
,
_high
;
};
#endif
// ----------------------------------------------------------------------------------------
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
simd8i
&
item
)
{
int32
temp
[
8
];
item
.
store
(
temp
);
out
<<
"("
<<
temp
[
0
]
<<
", "
<<
temp
[
1
]
<<
", "
<<
temp
[
2
]
<<
", "
<<
temp
[
3
]
<<
", "
<<
temp
[
4
]
<<
", "
<<
temp
[
5
]
<<
", "
<<
temp
[
6
]
<<
", "
<<
temp
[
7
]
<<
")"
;
return
out
;
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
+
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_add_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
+
rhs
.
low
(),
lhs
.
high
()
+
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
+=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
+
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
-
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_sub_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
-
rhs
.
low
(),
lhs
.
high
()
-
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
-=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
-
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
*
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_mullo_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
*
rhs
.
low
(),
lhs
.
high
()
*
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
*=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
*
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
&
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_and_si256
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
&
rhs
.
low
(),
lhs
.
high
()
&
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
&=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
&
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
|
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_or_si256
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
|
rhs
.
low
(),
lhs
.
high
()
|
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
|=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
|
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
^
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_xor_si256
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
^
rhs
.
low
(),
lhs
.
high
()
^
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
^=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
^
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
~
(
const
simd8i
&
lhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_xor_si256
(
lhs
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
#else
return
simd8i
(
~
lhs
.
low
(),
~
lhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
<<
(
const
simd8i
&
lhs
,
const
int
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_sll_epi32
(
lhs
,
_mm_cvtsi32_si128
(
rhs
));
#else
return
simd8i
(
lhs
.
low
()
<<
rhs
,
lhs
.
high
()
<<
rhs
);
#endif
}
inline
simd8i
&
operator
<<=
(
simd8i
&
lhs
,
const
int
&
rhs
)
{
return
lhs
=
lhs
<<
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
>>
(
const
simd8i
&
lhs
,
const
int
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_sra_epi32
(
lhs
,
_mm_cvtsi32_si128
(
rhs
));
#else
return
simd8i
(
lhs
.
low
()
>>
rhs
,
lhs
.
high
()
>>
rhs
);
#endif
}
inline
simd8i
&
operator
>>=
(
simd8i
&
lhs
,
const
int
&
rhs
)
{
return
lhs
=
lhs
>>
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
==
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_cmpeq_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
==
rhs
.
low
(),
lhs
.
high
()
==
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
!=
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
~
(
lhs
==
rhs
);
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
>
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_cmpgt_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
>
rhs
.
low
(),
lhs
.
high
()
>
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
<
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
rhs
>
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
<=
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
~
(
lhs
>
rhs
);
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
>=
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
rhs
<=
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8i
min
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_min_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
min
(
lhs
.
low
(),
rhs
.
low
()),
min
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
max
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_max_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
max
(
lhs
.
low
(),
rhs
.
low
()),
max
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
int32
sum
(
const
simd8i
&
item
)
{
return
sum
(
item
.
low
()
+
item
.
high
());
}
// ----------------------------------------------------------------------------------------
// perform cmp ? a : b
inline
simd8i
select
(
const
simd8i
&
cmp
,
const
simd8i
&
a
,
const
simd8i
&
b
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_blendv_epi8
(
b
,
a
,
cmp
);
#else
return
simd8i
(
select
(
cmp
.
low
(),
a
.
low
(),
b
.
low
()),
select
(
cmp
.
high
(),
a
.
high
(),
b
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_sIMD8I_H__
dlib/simd/simd_check.h
View file @
80d36f43
...
...
@@ -3,6 +3,7 @@
#ifndef DLIB_SIMd_CHECK_H__
#define DLIB_SIMd_CHECK_H__
//#define DLIB_DO_NOT_USE_SIMD
// figure out which SIMD instructions we can use.
#ifndef DLIB_DO_NOT_USE_SIMD
...
...
@@ -27,29 +28,38 @@
#ifdef __AVX__
#define DLIB_HAVE_AVX
#endif
#ifdef __AVX2__
#define DLIB_HAVE_AVX2
#endif
#endif
#endif
// ----------------------------------------------------------------------------------------
#ifdef DLIB_HAVE_SSE2
#include <xmmintrin.h>
#include <emmintrin.h>
#include <mmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE3
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE41
#include <smmintrin.h> // SSE4
#endif
#ifdef DLIB_HAVE_AVX
#include <immintrin.h> // AVX
#ifdef __GNUC__
#include <x86intrin.h>
#else
#ifdef DLIB_HAVE_SSE2
#include <xmmintrin.h>
#include <emmintrin.h>
#include <mmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE3
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE41
#include <smmintrin.h> // SSE4
#endif
#ifdef DLIB_HAVE_AVX
#include <immintrin.h> // AVX
#endif
#ifdef DLIB_HAVE_AVX2
#include <avx2intrin.h>
#endif
#endif
#endif // DLIB_SIMd_CHECK_H__
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment