Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3df20646
Commit
3df20646
authored
Jan 31, 2022
by
Khalique Ahmed
Browse files
manual merge
parents
1005a693
d0543c96
Changes
104
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
826 additions
and
114 deletions
+826
-114
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
...pu/kernels/include/migraphx/kernels/integral_constant.hpp
+13
-10
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
+162
-0
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+28
-2
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+24
-10
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
...targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
+25
-14
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+54
-1
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
...argets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
+137
-74
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+4
-1
test/auto_contiguous_test.cpp
test/auto_contiguous_test.cpp
+34
-0
test/fuse_pointwise.cpp
test/fuse_pointwise.cpp
+29
-0
test/onnx/gen_onnx.py
test/onnx/gen_onnx.py
+227
-0
test/onnx/gen_onnx.pyc
test/onnx/gen_onnx.pyc
+0
-0
test/onnx/greaterorequal_test.onnx
test/onnx/greaterorequal_test.onnx
+16
-0
test/onnx/hardsigmoid_default_test.onnx
test/onnx/hardsigmoid_default_test.onnx
+15
-0
test/onnx/hardsigmoid_double_test.onnx
test/onnx/hardsigmoid_double_test.onnx
+17
-0
test/onnx/hardsigmoid_half_test.onnx
test/onnx/hardsigmoid_half_test.onnx
+17
-0
test/onnx/hardsigmoid_verify_test.onnx
test/onnx/hardsigmoid_verify_test.onnx
+11
-0
test/onnx/hardswish_test.onnx
test/onnx/hardswish_test.onnx
+11
-0
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
View file @
3df20646
...
@@ -5,28 +5,31 @@
...
@@ -5,28 +5,31 @@
namespace
migraphx
{
namespace
migraphx
{
template
<
class
T
,
T
v
>
template
<
class
T
,
T
V
>
struct
integral_constant
struct
integral_constant
{
{
static
constexpr
T
value
=
v
;
static
constexpr
T
value
=
V
;
using
value_type
=
T
;
using
value_type
=
T
;
using
type
=
integral_constant
;
using
type
=
integral_constant
;
constexpr
operator
value_type
()
const
noexcept
{
return
value
;
}
constexpr
operator
value_type
()
const
noexcept
{
return
value
;
}
constexpr
value_type
operator
()()
const
noexcept
{
return
value
;
}
constexpr
value_type
operator
()()
const
noexcept
{
return
value
;
}
static
constexpr
type
to
()
{
return
{};
}
};
};
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \
#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \
template <class T, T
v
, class U, U w> \
template <class T, T
V
, class U, U w> \
constexpr inline integral_constant<decltype(
v
op w), (
v
op w)> operator op( \
constexpr inline integral_constant<decltype(
V
op w), (
V
op w)> operator op( \
integral_constant<T,
v
>, integral_constant<U, w>) noexcept \
integral_constant<T,
V
>, integral_constant<U, w>) noexcept \
{ \
{ \
return {}; \
return {}; \
}
}
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \
#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \
template <class T, T
v
> \
template <class T, T
V
> \
constexpr inline integral_constant<decltype(op
v
), (op
v
)> operator op( \
constexpr inline integral_constant<decltype(op
V
), (op
V
)> operator op( \
integral_constant<T,
v
>) noexcept \
integral_constant<T,
V
>) noexcept \
{ \
{ \
return {}; \
return {}; \
}
}
...
@@ -64,8 +67,8 @@ using false_type = bool_constant<false>;
...
@@ -64,8 +67,8 @@ using false_type = bool_constant<false>;
template
<
index_int
N
>
template
<
index_int
N
>
using
index_constant
=
integral_constant
<
index_int
,
N
>
;
using
index_constant
=
integral_constant
<
index_int
,
N
>
;
template
<
auto
v
>
template
<
auto
V
>
static
constexpr
auto
_c
=
integral_constant
<
decltype
(
v
),
v
>
{};
static
constexpr
auto
_c
=
integral_constant
<
decltype
(
V
),
V
>
{};
// NOLINT
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
0 → 100644
View file @
3df20646
#ifndef MIGRAPHX_GUARD_KERNELS_MATH_HPP
#define MIGRAPHX_GUARD_KERNELS_MATH_HPP
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <hip/hip_fp16.h>
#include <hip/math_functions.h>
namespace
migraphx
{
namespace
math
{
constexpr
float
as_float
(
migraphx
::
half
x
)
{
return
x
;
}
template
<
class
T
>
constexpr
T
as_float
(
T
x
)
{
return
x
;
}
}
// namespace math
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH(name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) MIGRAPHX_RETURNS(fname(xs...))
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_VEC(name) \
template <class... Ts, MIGRAPHX_REQUIRES(is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) \
{ \
return vec_transform(xs...)([](auto... ys) { return name(ys...); }); \
}
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_FOR(type, name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(type x, Ts... xs)->type \
{ \
return fname(x, xs...); \
}
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_HALF(name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(migraphx::half x, Ts... xs) \
MIGRAPHX_RETURNS(fname(math::as_float(x), math::as_float(xs)...))
MIGRAPHX_DEVICE_MATH
(
abs
,
::
abs
)
MIGRAPHX_DEVICE_MATH
(
acos
,
::
acos
)
MIGRAPHX_DEVICE_MATH
(
acosh
,
::
acosh
)
MIGRAPHX_DEVICE_MATH
(
asin
,
::
asin
)
MIGRAPHX_DEVICE_MATH
(
asinh
,
::
asinh
)
MIGRAPHX_DEVICE_MATH
(
atan
,
::
atan
)
MIGRAPHX_DEVICE_MATH
(
atanh
,
::
atanh
)
MIGRAPHX_DEVICE_MATH
(
ceil
,
::
ceil
)
MIGRAPHX_DEVICE_MATH
(
cos
,
::
cos
)
MIGRAPHX_DEVICE_MATH
(
cosh
,
::
cosh
)
MIGRAPHX_DEVICE_MATH
(
erf
,
::
erf
)
MIGRAPHX_DEVICE_MATH
(
exp
,
::
exp
)
MIGRAPHX_DEVICE_MATH
(
floor
,
::
floor
)
MIGRAPHX_DEVICE_MATH
(
log
,
::
log
)
MIGRAPHX_DEVICE_MATH
(
pow
,
::
pow
)
MIGRAPHX_DEVICE_MATH
(
round
,
::
round
)
MIGRAPHX_DEVICE_MATH
(
rsqrt
,
::
rsqrt
)
MIGRAPHX_DEVICE_MATH
(
sin
,
::
sin
)
MIGRAPHX_DEVICE_MATH
(
sinh
,
::
sinh
)
MIGRAPHX_DEVICE_MATH
(
sqrt
,
::
sqrt
)
MIGRAPHX_DEVICE_MATH
(
tan
,
::
tan
)
MIGRAPHX_DEVICE_MATH
(
tanh
,
::
tanh
)
// Float overloads
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
acos
,
::
acosf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
acosh
,
::
acoshf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
asin
,
::
asinf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
asinh
,
::
asinhf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
atan
,
::
atanf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
atanh
,
::
atanhf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
cos
,
::
cosf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
cosh
,
::
coshf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
rsqrt
,
::
rsqrtf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
sin
,
::
sinf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
sinh
,
::
sinhf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
tan
,
::
tanf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
tanh
,
::
tanhf
)
// Builtin half functions
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
abs
,
::
__habs
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
exp
,
::
hexp
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
log
,
::
hlog
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
rsqrt
,
::
hrsqrt
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
sqrt
,
::
hsqrt
)
// Use float to compute half overload
MIGRAPHX_DEVICE_MATH_HALF
(
acos
,
::
acos
)
MIGRAPHX_DEVICE_MATH_HALF
(
acosh
,
::
acosh
)
MIGRAPHX_DEVICE_MATH_HALF
(
asin
,
::
asin
)
MIGRAPHX_DEVICE_MATH_HALF
(
asinh
,
::
asinh
)
MIGRAPHX_DEVICE_MATH_HALF
(
atan
,
::
atan
)
MIGRAPHX_DEVICE_MATH_HALF
(
atanh
,
::
atanh
)
MIGRAPHX_DEVICE_MATH_HALF
(
ceil
,
::
ceil
)
MIGRAPHX_DEVICE_MATH_HALF
(
cos
,
::
cos
)
MIGRAPHX_DEVICE_MATH_HALF
(
cosh
,
::
cosh
)
MIGRAPHX_DEVICE_MATH_HALF
(
erf
,
::
erf
)
MIGRAPHX_DEVICE_MATH_HALF
(
floor
,
::
floor
)
MIGRAPHX_DEVICE_MATH_HALF
(
pow
,
::
pow
)
MIGRAPHX_DEVICE_MATH_HALF
(
round
,
::
round
)
MIGRAPHX_DEVICE_MATH_HALF
(
sin
,
::
sin
)
MIGRAPHX_DEVICE_MATH_HALF
(
sinh
,
::
sinh
)
MIGRAPHX_DEVICE_MATH_HALF
(
tan
,
::
tan
)
MIGRAPHX_DEVICE_MATH_HALF
(
tanh
,
::
tanh
)
template
<
class
T
,
class
U
>
constexpr
auto
where
(
bool
cond
,
const
T
&
a
,
const
U
&
b
)
{
return
cond
?
a
:
b
;
}
MIGRAPHX_DEVICE_MATH_VEC
(
abs
)
MIGRAPHX_DEVICE_MATH_VEC
(
acos
)
MIGRAPHX_DEVICE_MATH_VEC
(
acosh
)
MIGRAPHX_DEVICE_MATH_VEC
(
asin
)
MIGRAPHX_DEVICE_MATH_VEC
(
asinh
)
MIGRAPHX_DEVICE_MATH_VEC
(
atan
)
MIGRAPHX_DEVICE_MATH_VEC
(
atanh
)
MIGRAPHX_DEVICE_MATH_VEC
(
ceil
)
MIGRAPHX_DEVICE_MATH_VEC
(
cos
)
MIGRAPHX_DEVICE_MATH_VEC
(
cosh
)
MIGRAPHX_DEVICE_MATH_VEC
(
erf
)
MIGRAPHX_DEVICE_MATH_VEC
(
exp
)
MIGRAPHX_DEVICE_MATH_VEC
(
floor
)
MIGRAPHX_DEVICE_MATH_VEC
(
log
)
MIGRAPHX_DEVICE_MATH_VEC
(
pow
)
MIGRAPHX_DEVICE_MATH_VEC
(
round
)
MIGRAPHX_DEVICE_MATH_VEC
(
rsqrt
)
MIGRAPHX_DEVICE_MATH_VEC
(
sin
)
MIGRAPHX_DEVICE_MATH_VEC
(
sinh
)
MIGRAPHX_DEVICE_MATH_VEC
(
sqrt
)
MIGRAPHX_DEVICE_MATH_VEC
(
tan
)
MIGRAPHX_DEVICE_MATH_VEC
(
tanh
)
MIGRAPHX_DEVICE_MATH_VEC
(
where
)
template
<
class
T
,
class
U
>
constexpr
auto
max
(
const
T
&
a
,
const
U
&
b
)
{
return
where
(
a
<
b
,
b
,
a
);
}
template
<
class
T
,
class
U
>
constexpr
auto
min
(
const
T
&
a
,
const
U
&
b
)
{
return
where
(
a
>
b
,
b
,
a
);
}
template
<
class
T
,
class
U
>
constexpr
auto
convert
(
U
v
)
{
return
vec_transform
(
v
)([](
auto
x
)
->
T
{
return
x
;
});
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_MATH_HPP
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
100755 → 100644
View file @
3df20646
...
@@ -3,19 +3,45 @@
...
@@ -3,19 +3,45 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/math.hpp>
#include <migraphx/kernels/preload.hpp>
#include <migraphx/kernels/preload.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/args.hpp>
#include <migraphx/kernels/args.hpp>
namespace
migraphx
{
namespace
migraphx
{
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
template
<
class
F
,
class
T
,
class
...
Ts
>
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
{
preload
<
typename
T
::
type
>
(
idx
,
xs
...)([
&
](
auto
...
ps
)
{
preload
<
typename
T
::
type
>
(
idx
,
xs
...)([
&
](
auto
...
ps
)
{
idx
.
global_stride
(
out
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
idx
.
global_stride
(
out
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
auto
multi_idx
=
out
.
get_shape
().
multi
(
i
);
auto
multi_idx
=
out
.
get_shape
().
multi
(
i
);
out
[
multi_idx
]
=
f
(
ps
[
multi_idx
]...);
out
[
multi_idx
]
=
implicit_conversion
(
f
(
ps
[
multi_idx
]...)
)
;
});
});
});
});
}
}
...
@@ -23,7 +49,7 @@ __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
...
@@ -23,7 +49,7 @@ __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
template
<
class
F
,
class
...
Ts
>
template
<
class
F
,
class
...
Ts
>
__device__
void
pointwise
(
F
f
,
Ts
*
...
ps
)
__device__
void
pointwise
(
F
f
,
Ts
*
...
ps
)
{
{
auto
t
=
transform_args
(
make_tensors
(),
rotate_last
());
auto
t
=
transform_args
(
make_tensors
(),
rotate_last
()
,
auto_vectorize
()
);
t
(
ps
...)([
&
](
auto
...
xs
)
{
t
(
ps
...)([
&
](
auto
...
xs
)
{
auto
idx
=
make_index
();
auto
idx
=
make_index
();
pointwise_tensor
(
idx
,
f
,
xs
...);
pointwise_tensor
(
idx
,
f
,
xs
...);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
100755 → 100644
View file @
3df20646
...
@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss)
...
@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss)
auto
each
=
[
&
](
auto
x
)
{
auto
each
=
[
&
](
auto
x
)
{
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
constexpr
auto
size
=
_c
<
s
.
element_space
()
>
;
constexpr
auto
size
=
_c
<
s
.
element_space
()
>
;
if
constexpr
(
not
s
.
broadcasted
())
if
constexpr
(
not
s
.
broadcasted
()
or
(
s
.
elements
()
-
size
)
<
64
)
return
f
(
x
,
offset
,
false_type
{});
else
if
constexpr
((
s
.
elements
()
-
size
)
<
64
)
return
f
(
x
,
offset
,
false_type
{});
return
f
(
x
,
offset
,
false_type
{});
else
else
{
{
...
@@ -31,7 +29,7 @@ constexpr auto traverse_preload(Shapes... ss)
...
@@ -31,7 +29,7 @@ constexpr auto traverse_preload(Shapes... ss)
}
}
template
<
class
T
,
class
...
Shapes
>
template
<
class
T
,
class
...
Shapes
>
constexpr
index_int
compute_preload_size
(
Shapes
...)
constexpr
index_int
compute_preload_size
_c
(
Shapes
...)
{
{
index_int
size
=
0
;
index_int
size
=
0
;
traverse_preload
<
T
>
(
Shapes
{}...)(
traverse_preload
<
T
>
(
Shapes
{}...)(
...
@@ -39,6 +37,12 @@ constexpr index_int compute_preload_size(Shapes...)
...
@@ -39,6 +37,12 @@ constexpr index_int compute_preload_size(Shapes...)
return
size
;
return
size
;
}
}
template
<
class
T
,
class
...
Shapes
>
constexpr
auto
compute_preload_size
(
Shapes
...)
{
return
_c
<
compute_preload_size_c
<
T
>
(
Shapes
{}...)
>
;
}
template
<
class
F
,
class
T
,
class
...
Ts
>
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
auto
preload_copy
(
index
idx
,
F
f
,
__shared__
T
*
buffer
,
Ts
...
xs
)
__device__
auto
preload_copy
(
index
idx
,
F
f
,
__shared__
T
*
buffer
,
Ts
...
xs
)
{
{
...
@@ -50,11 +54,21 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
...
@@ -50,11 +54,21 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
[
&
](
auto
x
,
auto
offset
,
auto
copy
)
{
[
&
](
auto
x
,
auto
offset
,
auto
copy
)
{
if
constexpr
(
copy
)
if
constexpr
(
copy
)
{
{
auto
v
=
vectorize
(
x
);
if
constexpr
(
decltype
(
tensor_vec_size
(
x
)){}
==
0
)
auto
b
=
as_vec
(
tensor_vec_size
(
v
),
buffer
+
offset
);
{
idx
.
local_stride
(
v
.
get_shape
().
element_space
(),
auto
v
=
vectorize
(
x
);
[
&
](
auto
i
)
{
b
[
i
]
=
v
.
data
()[
i
];
});
auto
b
=
as_vec
(
tensor_vec_size
(
v
),
buffer
+
offset
);
return
x
.
with
(
buffer
+
offset
);
idx
.
local_stride
(
v
.
get_shape
().
element_space
(),
[
&
](
auto
i
)
{
b
[
i
]
=
v
.
data
()[
i
];
});
return
x
.
with
(
buffer
+
offset
);
}
else
{
auto
b
=
as_vec
(
tensor_vec_size
(
x
),
buffer
+
offset
);
idx
.
local_stride
(
x
.
get_shape
().
element_space
(),
[
&
](
auto
i
)
{
b
[
i
]
=
x
.
data
()[
i
];
});
return
x
.
with
(
b
);
}
}
}
else
else
{
{
...
@@ -80,7 +94,7 @@ template <class T, class... Ts>
...
@@ -80,7 +94,7 @@ template <class T, class... Ts>
__device__
auto
preload
(
index
idx
,
Ts
...
xs
)
__device__
auto
preload
(
index
idx
,
Ts
...
xs
)
{
{
using
type
=
typename
remove_vec
<
T
>::
type
;
using
type
=
typename
remove_vec
<
T
>::
type
;
constexpr
auto
size
=
compute_preload_size
<
type
>
(
xs
.
get_shape
()...);
constexpr
auto
size
=
decltype
(
compute_preload_size
<
type
>
(
xs
.
get_shape
()...)
){}
;
const
index_int
max_size
=
512
*
sizeof
(
type
);
const
index_int
max_size
=
512
*
sizeof
(
type
);
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
if
constexpr
(
size
>
0
and
size
<
max_size
)
if
constexpr
(
size
>
0
and
size
<
max_size
)
...
...
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
View file @
3df20646
#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#include <
hip/hip_runtime.h
>
#include <
migraphx/kernels/hip.hpp
>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/algorithm.hpp>
...
...
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
View file @
3df20646
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <
args
.hpp>
#include <
migraphx/kernels/array
.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
...
@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
return
op
.
final
(
output_val
,
count
);
return
op
.
final
(
output_val
,
count
);
}
}
template
<
class
T
,
class
U
,
class
V
,
class
W
>
template
<
class
T
1
,
class
T2
,
class
T3
,
class
T4
>
__device__
void
ro
i
align
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
const
W
&
y_t
)
struct
roalign
_settings
{
{
const
float
roi_offset
=
ROIS_OFFSET
;
T1
roi_offset
{};
const
bool
is_avg_pooling
=
IS_AVG_POOLING
;
T2
is_avg_pooling
{};
const
int64_t
sampling_ratio
=
SAMPLING_RATIO
;
T3
sampling_ratio
{};
const
float
spatial_scale
=
SPATIAL_SCALE
;
T4
spatial_scale
{};
};
template
<
class
...
Ts
>
constexpr
roalign_settings
<
Ts
...
>
make_roalign_settings
(
Ts
...
xs
)
{
return
{
xs
...};
}
template
<
class
T
,
class
U
,
class
V
,
class
W
,
class
Settings
>
__device__
void
roialign
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
const
W
&
y_t
,
Settings
s
)
{
auto
index
=
make_index
();
auto
index
=
make_index
();
const
auto
*
x
=
x_t
.
data
();
const
auto
*
x
=
x_t
.
data
();
const
auto
*
rois
=
rois_t
.
data
();
const
auto
*
rois
=
rois_t
.
data
();
...
@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
...
@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const
auto
*
offset_rois
=
rois
+
(
n
*
roi_column_num
);
const
auto
*
offset_rois
=
rois
+
(
n
*
roi_column_num
);
const
int
batch_ind
=
ind
[
n
];
const
int
batch_ind
=
ind
[
n
];
array
<
float
,
2
>
roi_starts
=
{
offset_rois
[
1
]
*
spatial_scale
,
array
<
float
,
2
>
roi_starts
=
{
offset_rois
[
1
]
*
s
.
spatial_scale
,
offset_rois
[
0
]
*
spatial_scale
};
offset_rois
[
0
]
*
s
.
spatial_scale
};
array
<
float
,
2
>
roi_ends
=
{
offset_rois
[
3
]
*
spatial_scale
,
offset_rois
[
2
]
*
spatial_scale
};
array
<
float
,
2
>
roi_ends
=
{
offset_rois
[
3
]
*
s
.
spatial_scale
,
offset_rois
[
2
]
*
s
.
spatial_scale
};
array
<
float
,
2
>
roi_size
{};
array
<
float
,
2
>
roi_size
{};
array
<
float
,
2
>
bin_size
{};
array
<
float
,
2
>
bin_size
{};
...
@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
...
@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
bin_size
[
ii
]
=
roi_size
[
ii
]
/
out_dims
[
ii
];
bin_size
[
ii
]
=
roi_size
[
ii
]
/
out_dims
[
ii
];
bin_grid_size
[
ii
]
=
bin_grid_size
[
ii
]
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
std
::
ceil
(
roi_size
[
ii
]
/
out_dims
[
ii
]);
(
s
.
sampling_ratio
>
0
)
?
s
.
sampling_ratio
:
std
::
ceil
(
roi_size
[
ii
]
/
out_dims
[
ii
]);
}
}
const
auto
*
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
const
auto
*
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
if
constexpr
(
is_avg_pooling
)
if
constexpr
(
s
.
is_avg_pooling
)
{
{
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
roi_starts
,
...
@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
...
@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{
ph
,
pw
},
{
ph
,
pw
},
bin_grid_size
,
bin_grid_size
,
in_dims
,
in_dims
,
roi_offset
,
s
.
roi_offset
,
avg_pool
{});
avg_pool
{});
}
}
else
else
...
@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
...
@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{
ph
,
pw
},
{
ph
,
pw
},
bin_grid_size
,
bin_grid_size
,
in_dims
,
in_dims
,
roi_offset
,
s
.
roi_offset
,
max_pool
{});
max_pool
{});
}
}
}
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
View file @
3df20646
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#include <
hip/hip_runtime.h
>
#include <
migraphx/kernels/hip.hpp
>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
100755 → 100644
View file @
3df20646
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/functional.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
@@ -13,7 +14,7 @@ constexpr auto vec_size(vec<T, N>)
...
@@ -13,7 +14,7 @@ constexpr auto vec_size(vec<T, N>)
}
}
template
<
class
T
>
template
<
class
T
>
constexpr
auto
vec_size
(
T
,
...)
constexpr
auto
vec_size
(
T
,
...)
// NOLINT
{
{
return
index_constant
<
0
>
{};
return
index_constant
<
0
>
{};
}
}
...
@@ -24,6 +25,38 @@ constexpr auto vec_size()
...
@@ -24,6 +25,38 @@ constexpr auto vec_size()
return
decltype
(
vec_size
(
T
{})){};
return
decltype
(
vec_size
(
T
{})){};
}
}
template
<
class
...
Ts
>
constexpr
auto
is_any_vec
()
{
if
constexpr
(
sizeof
...(
Ts
)
==
0
)
return
false_type
{};
else
return
bool_constant
<
((
vec_size
<
Ts
>
()
+
...)
>
0
)
>
{};
}
template
<
class
T
,
class
I
>
constexpr
auto
vec_at
(
T
x
,
I
i
)
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
return
x
;
else
{
MIGRAPHX_ASSERT
(
i
<
vec_size
<
T
>
());
return
x
[
i
];
}
}
template
<
class
...
Ts
>
constexpr
auto
common_vec_size
()
{
return
fold
([](
auto
x
,
auto
y
)
{
if
constexpr
(
x
>
y
)
return
x
;
else
return
y
;
})(
vec_size
<
Ts
>
()...);
}
template
<
index_int
N
,
class
T
>
template
<
index_int
N
,
class
T
>
__device__
__host__
auto
as_vec
(
T
*
x
)
__device__
__host__
auto
as_vec
(
T
*
x
)
{
{
...
@@ -33,5 +66,25 @@ __device__ __host__ auto as_vec(T* x)
...
@@ -33,5 +66,25 @@ __device__ __host__ auto as_vec(T* x)
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
}
template
<
class
...
Ts
>
constexpr
auto
vec_transform
(
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
if
constexpr
(
is_any_vec
<
Ts
...
>
())
{
using
type
=
decltype
(
f
(
vec_at
(
xs
,
0
)...));
constexpr
auto
size
=
common_vec_size
<
Ts
...
>
();
vec
<
type
,
size
>
result
=
{
0
};
for
(
int
i
=
0
;
i
<
size
;
i
++
)
result
[
i
]
=
f
(
vec_at
(
xs
,
i
)...);
return
result
;
}
else
{
return
f
(
xs
...);
}
};
}
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
View file @
3df20646
...
@@ -7,40 +7,70 @@
...
@@ -7,40 +7,70 @@
namespace
migraphx
{
namespace
migraphx
{
template
<
class
T
>
template
<
class
T
>
constexpr
auto
tensor_vec_size
(
T
)
constexpr
auto
tensor_vec_size
()
{
{
return
vec_size
<
typename
T
::
type
>
();
return
vec_size
<
typename
T
::
type
>
();
}
}
template
<
index_int
N
,
class
Shape
>
template
<
class
T
>
constexpr
auto
as_vec_shape
(
Shape
s
)
constexpr
auto
tensor_vec_size
(
T
)
{
{
auto
lens
=
transform
(
s
.
lens
,
s
.
strides
,
[](
auto
len
,
auto
stride
)
{
return
tensor_vec_size
<
T
>
();
if
(
stride
==
1
)
}
return
len
/
N
;
else
template
<
index_int
N
,
class
Shape
,
class
Axis
>
return
len
;
constexpr
auto
shape_step
(
Shape
s
,
Axis
)
});
{
auto
strides
=
transform
(
s
.
strides
,
[](
auto
stride
)
{
static_assert
(
N
>
0
,
"Vector size must be non-zero"
);
if
(
stride
==
1
)
return
sequence
(
s
.
lens
.
size
(),
[
&
](
auto
...
is
)
{
return
stride
;
auto
lens
=
transform
(
s
.
lens
,
index_ints
<
is
...
>
{},
[
&
](
auto
i
,
auto
j
)
{
return
stride
/
N
;
constexpr
auto
axis
=
Axis
::
to
();
MIGRAPHX_ASSERT
(
i
!=
0
);
MIGRAPHX_ASSERT
(
j
!=
axis
or
i
%
N
==
0
);
if
(
j
==
axis
)
return
i
/
N
;
else
return
i
;
});
auto
strides
=
transform
(
s
.
strides
,
index_ints
<
is
...
>
{},
[
&
](
auto
i
,
auto
j
)
{
constexpr
auto
axis
=
Axis
::
to
();
// If stride of the axis is zero then we dont need to adjust the other strides
if
(
Shape
{}.
strides
[
axis
]
==
0
)
return
i
;
MIGRAPHX_ASSERT
(
j
==
axis
or
i
%
N
==
0
);
if
(
j
==
axis
)
return
i
;
else
return
i
/
N
;
});
MIGRAPHX_ASSERT
(
make_shape
(
lens
,
strides
).
elements
()
*
N
==
s
.
elements
());
MIGRAPHX_ASSERT
(
strides
[
Axis
{}]
==
0
or
make_shape
(
lens
,
strides
).
element_space
()
*
N
==
s
.
element_space
());
return
make_shape
(
lens
,
strides
);
});
});
MIGRAPHX_ASSERT
(
make_shape
(
lens
,
strides
).
element_space
()
*
N
==
s
.
element_space
());
return
make_shape
(
lens
,
strides
);
}
}
template
<
index_int
N
,
class
T
>
// Bools can not be used as a vector type so convert it to int8
__device__
__host__
auto
as_vec
(
T
x
)
template
<
class
T
>
__device__
__host__
T
*
remove_bool
(
T
*
x
)
{
return
x
;
}
inline
__device__
__host__
int8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
int8_t
*>
(
x
);
}
template
<
index_int
N
,
class
T
,
class
Axis
>
__device__
__host__
auto
as_vec
(
T
x
,
Axis
axis
)
{
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
==
0
)
return
x
;
return
x
;
else
else
return
make_tensor_view
(
as_vec
<
N
>
(
x
.
data
()),
as_vec_shape
<
N
>
(
x
.
get_shape
()));
return
make_tensor_view
(
as_vec
<
N
>
(
remove_bool
(
x
.
data
())),
shape_step
<
N
>
(
x
.
get_shape
(),
axis
));
}
}
template
<
index_int
N
,
class
T
,
class
Axis
>
template
<
index_int
N
,
class
T
,
class
Axis
>
constexpr
auto
tensor_step
(
T
x
,
Axis
)
constexpr
auto
tensor_step
(
T
x
,
Axis
axis
)
{
{
if
constexpr
(
N
==
0
)
if
constexpr
(
N
==
0
)
{
{
...
@@ -49,17 +79,8 @@ constexpr auto tensor_step(T x, Axis)
...
@@ -49,17 +79,8 @@ constexpr auto tensor_step(T x, Axis)
else
else
{
{
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
MIGRAPHX_ASSERT
(
s
.
strides
[
Axis
{}]
==
0
);
MIGRAPHX_ASSERT
(
s
.
strides
[
axis
]
==
0
);
return
sequence
(
x
.
get_shape
().
lens
.
size
(),
[
&
](
auto
...
is
)
{
return
make_tensor_view
(
x
.
data
(),
shape_step
<
N
>
(
s
,
axis
));
auto
lens
=
transform
(
s
.
lens
,
index_ints
<
is
...
>
{},
[
&
](
auto
i
,
auto
j
)
{
constexpr
auto
axis
=
Axis
{};
if
(
j
==
axis
)
return
i
/
N
;
else
return
i
;
});
return
make_tensor_view
(
x
.
data
(),
make_shape
(
lens
,
s
.
strides
));
});
}
}
}
}
...
@@ -69,42 +90,71 @@ __device__ __host__ auto as_vec(IntegralConstant ic, T&& x)
...
@@ -69,42 +90,71 @@ __device__ __host__ auto as_vec(IntegralConstant ic, T&& x)
return
as_vec
<
ic
>
(
x
);
return
as_vec
<
ic
>
(
x
);
}
}
template
<
class
...
Shape
s
>
template
<
class
Shape
>
constexpr
index_int
find_vector_axis
(
Shape
s
...
s
s
)
constexpr
index_int
find_vector_axis
_c
(
Shape
s
)
{
{
// Find the fastest axis that is not broadcasted
index_int
axis
=
0
;
index_int
axis
=
0
;
bool
b
=
false
;
for
(
index_int
i
=
1
;
i
<
s
.
lens
.
size
();
i
++
)
{
if
(
s
.
strides
[
i
]
==
0
)
continue
;
if
(
s
.
strides
[
axis
]
==
0
or
pack_compare
(
less
{},
pack
(
s
.
strides
[
i
],
s
.
lens
[
i
]),
pack
(
s
.
strides
[
axis
],
s
.
lens
[
axis
])))
axis
=
i
;
}
return
axis
;
}
template
<
class
...
Shapes
>
constexpr
index_int
find_vector_axis_c
(
Shapes
...
ss
)
{
const
bool
all_broadcasted
=
(
ss
.
broadcasted
()
and
...);
index_int
axis
=
0
;
bool
b
=
false
;
by
([
&
](
auto
s
)
{
by
([
&
](
auto
s
)
{
if
(
s
.
broadcasted
()
or
b
)
if
(
b
)
return
;
return
;
auto
it
=
find
(
s
.
strides
.
begin
(),
s
.
strides
.
end
(),
1
);
// Skip broadcasted shapes if there are shapes not broadcasted
if
(
it
==
s
.
strides
.
en
d
())
if
(
not
all_broadcasted
and
s
.
broadcaste
d
())
return
;
return
;
axis
=
it
-
s
.
strides
.
begin
();
axis
=
find_vector_axis_c
(
s
);
b
=
true
;
if
(
s
.
strides
[
axis
]
==
1
)
b
=
true
;
})(
ss
...);
})(
ss
...);
if
(
not
b
)
return
-
1
;
return
axis
;
return
axis
;
}
}
template
<
class
...
Shapes
>
constexpr
auto
find_vector_axis
(
Shapes
...)
{
return
_c
<
find_vector_axis_c
(
Shapes
{}...)
>
;
}
template
<
index_int
N
,
class
Axis
,
class
...
Shapes
>
template
<
index_int
N
,
class
Axis
,
class
...
Shapes
>
constexpr
auto
is_vectorizable
(
Axis
axis
,
Shapes
...
ss
)
constexpr
auto
is_vectorizable
_c
(
Axis
axis
,
Shapes
...
ss
)
{
{
return
(((
ss
.
lens
[
axis
]
%
N
)
==
0
and
(
ss
.
strides
[
axis
]
==
1
or
ss
.
strides
[
axis
]
==
0
))
and
return
((
axis
<
ss
.
lens
.
size
()
and
ss
.
lens
[
axis
]
%
N
==
0
and
// Only vectorize broadcasted types with stride 0, since this causes issues in the
// preloader
((
not
ss
.
broadcasted
()
and
ss
.
strides
[
axis
]
==
1
)
or
ss
.
strides
[
axis
]
==
0
))
and
...);
...);
}
}
template
<
index_int
N
,
class
...
Shapes
>
template
<
index_int
N
,
class
Axis
,
class
...
Shapes
>
constexpr
bool
is_vectorizable
(
Shapes
...
ss
)
constexpr
auto
is_vectorizable
(
Axis
,
Shapes
...)
{
{
return
(
is_vectorizable
<
N
>
(
ss
,
find_vector_axis
(
ss
))
and
...);
return
_c
<
is_vectorizable
_c
<
N
>
(
Axis
::
to
(),
Shapes
{}
...)
>
;
}
}
template
<
class
P
>
template
<
class
P
>
constexpr
auto
find_vectorize_size
(
P
pred
)
constexpr
auto
find_vectorize_size
(
P
pred
)
{
{
if
constexpr
(
pred
(
_c
<
4
>
))
if
constexpr
(
decltype
(
pred
(
_c
<
4
>
))
{})
return
_c
<
4
>
;
return
_c
<
4
>
;
else
if
constexpr
(
pred
(
_c
<
2
>
))
else
if
constexpr
(
decltype
(
pred
(
_c
<
2
>
))
{})
return
_c
<
2
>
;
return
_c
<
2
>
;
else
else
return
_c
<
0
>
;
return
_c
<
0
>
;
...
@@ -113,11 +163,12 @@ constexpr auto find_vectorize_size(P pred)
...
@@ -113,11 +163,12 @@ constexpr auto find_vectorize_size(P pred)
template
<
class
T
>
template
<
class
T
>
__host__
__device__
auto
vectorize
(
T
x
)
__host__
__device__
auto
vectorize
(
T
x
)
{
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
if
constexpr
(
tensor_
vec_size
<
T
>
()
==
0
)
{
{
constexpr
auto
axis
=
find_vector_axis
(
x
.
get_shape
());
constexpr
auto
n
=
constexpr
auto
n
=
find_vectorize_size
([
&
](
auto
i
)
{
return
_c
<
is_vectorizable
<
i
>
(
x
.
get_shape
())
>
;
});
find_vectorize_size
([
&
](
auto
i
)
{
return
is_vectorizable
<
i
>
(
axis
,
x
.
get_shape
());
});
return
as_vec
<
n
>
(
x
);
return
as_vec
<
n
>
(
x
,
axis
);
}
}
else
else
{
{
...
@@ -125,34 +176,46 @@ __host__ __device__ auto vectorize(T x)
...
@@ -125,34 +176,46 @@ __host__ __device__ auto vectorize(T x)
}
}
}
}
template
<
class
F
,
class
...
Ts
>
inline
__device__
__host__
auto
auto_vectorize_impl
(
F
f
,
Ts
...
xs
)
{
// TODO: Just check there a single axis of 1
constexpr
bool
packed_or_broadcasted
=
((
xs
.
get_shape
().
packed
()
or
xs
.
get_shape
().
broadcasted
())
and
...);
if
constexpr
(
packed_or_broadcasted
)
{
constexpr
auto
axis
=
decltype
(
find_vector_axis
(
xs
.
get_shape
()...)){};
constexpr
auto
n
=
find_vectorize_size
(
[
&
](
auto
i
)
{
return
is_vectorizable
<
i
>
(
axis
,
xs
.
get_shape
()...);
});
by
(
[
&
](
auto
x
)
{
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
if
constexpr
(
axis
<
s
.
strides
.
size
())
{
MIGRAPHX_ASSERT
(
s
.
strides
[
axis
]
==
0
or
s
.
strides
[
axis
]
==
1
);
MIGRAPHX_ASSERT
(
s
.
lens
[
axis
]
>
0
);
MIGRAPHX_ASSERT
(
n
==
0
or
s
.
lens
[
axis
]
%
n
==
0
);
if
constexpr
(
s
.
strides
[
axis
]
==
0
)
return
tensor_step
<
n
>
(
x
,
axis
);
else
return
as_vec
<
n
>
(
x
,
axis
);
}
else
{
return
x
;
}
},
f
)(
xs
...);
}
else
{
f
(
xs
...);
}
}
inline
__device__
__host__
auto
auto_vectorize
()
inline
__device__
__host__
auto
auto_vectorize
()
{
{
return
[](
auto
...
xs
)
{
return
[](
auto
...
xs
)
{
return
[
=
](
auto
f
)
{
auto_vectorize_impl
(
f
,
xs
...);
};
};
return
[
=
](
auto
f
)
{
// TODO: Just check there a single axis of 1
constexpr
bool
packed_or_broadcasted
=
((
xs
.
get_shape
().
packed
()
or
xs
.
get_shape
().
broadcasted
())
and
...);
if
constexpr
(
packed_or_broadcasted
)
{
constexpr
auto
axis
=
find_vector_axis
(
xs
.
get_shape
()...);
constexpr
auto
n
=
find_vectorize_size
(
[
&
](
auto
i
)
{
return
_c
<
is_vectorizable
<
i
>
(
axis
,
xs
.
get_shape
()...)
>
;
});
by
(
[
&
](
auto
x
)
{
constexpr
auto
s
=
x
.
get_shape
();
if
constexpr
(
s
.
strides
[
axis
]
==
0
)
return
tensor_step
<
n
>
(
x
,
axis
);
else
return
as_vec
<
n
>
(
x
);
},
f
)(
xs
...);
}
else
{
f
(
xs
...);
}
};
};
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/lowering.cpp
View file @
3df20646
...
@@ -60,6 +60,7 @@ struct miopen_apply
...
@@ -60,6 +60,7 @@ struct miopen_apply
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
prog_output_names
{};
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
prog_output_names
{};
bool
offload_copy
=
false
;
bool
offload_copy
=
false
;
bool
int8_x4_format
=
true
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
context
&
get_context
()
const
context
&
get_context
()
const
{
{
...
@@ -103,6 +104,8 @@ struct miopen_apply
...
@@ -103,6 +104,8 @@ struct miopen_apply
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
&
ctx
=
get_context
();
auto
&
ctx
=
get_context
();
if
(
ctx
.
get_stream
().
get_device_name
()
==
"gfx908"
)
compute_fp32
=
true
;
rocblas_gemm_flags
flag
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
...
@@ -337,7 +340,7 @@ struct miopen_apply
...
@@ -337,7 +340,7 @@ struct miopen_apply
}
}
}
}
return
mod
->
replace_instruction
(
return
mod
->
replace_instruction
(
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
},
refs
);
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
,
compute_fp32
},
refs
);
});
});
}
}
...
...
test/auto_contiguous_test.cpp
View file @
3df20646
...
@@ -101,4 +101,38 @@ TEST_CASE(after_param_broadcast)
...
@@ -101,4 +101,38 @@ TEST_CASE(after_param_broadcast)
EXPECT
(
not
m
.
get_output_shapes
().
back
().
broadcasted
());
EXPECT
(
not
m
.
get_output_shapes
().
back
().
broadcasted
());
}
}
TEST_CASE
(
two_transpose_gather
)
{
migraphx
::
module
m1
;
{
auto
data
=
m1
.
add_parameter
(
"2x2"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
5
}});
auto
ind
=
m1
.
add_parameter
(
"ind"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}});
auto
td
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
2
,
3
,
1
}}}),
data
);
auto
sd
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"softmax"
,
{{
"axis"
,
2
}}),
td
);
auto
bd
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
3
,
1
,
2
}}}),
sd
);
auto
r
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"gather"
,
{{
"axis"
,
2
}}),
bd
,
ind
);
m1
.
add_return
({
r
});
}
run_pass
(
m1
);
migraphx
::
module
m2
;
{
auto
data
=
m2
.
add_parameter
(
"2x2"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
5
}});
auto
ind
=
m2
.
add_parameter
(
"ind"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}});
auto
td
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
2
,
3
,
1
}}}),
data
);
auto
ctd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
td
);
auto
sd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"softmax"
,
{{
"axis"
,
2
}}),
ctd
);
auto
bd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
3
,
1
,
2
}}}),
sd
);
auto
cbd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
bd
);
auto
r
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"gather"
,
{{
"axis"
,
2
}}),
cbd
,
ind
);
m2
.
add_return
({
r
});
}
EXPECT
(
m1
==
m2
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/fuse_pointwise.cpp
View file @
3df20646
...
@@ -73,6 +73,35 @@ TEST_CASE(double_add)
...
@@ -73,6 +73,35 @@ TEST_CASE(double_add)
EXPECT
(
p1
.
sort
()
==
p2
.
sort
());
EXPECT
(
p1
.
sort
()
==
p2
.
sort
());
}
}
TEST_CASE
(
double_add_without_return
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
add1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
x
,
y
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
add1
,
z
);
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
fadd
=
add_pointwise
(
p2
,
"main:pointwise0"
,
{
x
,
y
,
z
},
[
=
](
auto
*
pm
,
const
auto
&
inputs
)
{
auto
add1
=
pm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
inputs
[
0
],
inputs
[
1
]);
return
pm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
add1
,
inputs
[
2
]);
});
mm
->
add_instruction
(
migraphx
::
make_op
(
"identity"
),
fadd
);
}
EXPECT
(
p1
.
sort
()
==
p2
.
sort
());
}
TEST_CASE
(
used_twice_not_fused
)
TEST_CASE
(
used_twice_not_fused
)
{
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
...
...
test/onnx/gen_onnx.py
View file @
3df20646
...
@@ -1618,6 +1618,22 @@ def greater_bool_test():
...
@@ -1618,6 +1618,22 @@ def greater_bool_test():
return
([
node1
,
node2
],
[
x1
,
x2
],
[
y
])
return
([
node1
,
node2
],
[
x1
,
x2
],
[
y
])
@
onnx_test
def
greaterorequal_test
():
x1
=
helper
.
make_tensor_value_info
(
'x1'
,
TensorProto
.
FLOAT
,
[
3
])
x2
=
helper
.
make_tensor_value_info
(
'x2'
,
TensorProto
.
FLOAT
,
[
3
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
])
node
=
onnx
.
helper
.
make_node
(
'GreaterOrEqual'
,
inputs
=
[
'x1'
,
'x2'
],
outputs
=
[
'y'
],
)
return
([
node
],
[
x1
,
x2
],
[
y
])
@
onnx_test
@
onnx_test
def
group_conv_test
():
def
group_conv_test
():
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
4
,
16
,
16
])
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
4
,
16
,
16
])
...
@@ -1634,6 +1650,60 @@ def group_conv_test():
...
@@ -1634,6 +1650,60 @@ def group_conv_test():
return
([
node
],
[
x
,
y
],
[
z
])
return
([
node
],
[
x
,
y
],
[
z
])
@
onnx_test
def
hardsigmoid_default_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
1
,
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardsigmoid_double_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
DOUBLE
,
[
1
,
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
DOUBLE
,
[
1
,
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
],
alpha
=
0.3
,
beta
=
0.7
)
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardsigmoid_half_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT16
,
[
1
,
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT16
,
[
1
,
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardsigmoid_verify_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardswish_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSwish'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
@
onnx_test
def
if_else_test
():
def
if_else_test
():
x
=
onnx
.
helper
.
make_tensor_value_info
(
'x'
,
onnx
.
TensorProto
.
FLOAT
,
[
2
,
3
])
x
=
onnx
.
helper
.
make_tensor_value_info
(
'x'
,
onnx
.
TensorProto
.
FLOAT
,
[
2
,
3
])
...
@@ -2692,6 +2762,80 @@ def maxpool_same_upper_test():
...
@@ -2692,6 +2762,80 @@ def maxpool_same_upper_test():
return
([
node
],
[
x
],
[
y
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
mean_broadcast_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
3
,
4
])
data_1
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
,
4
])
data_2
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT
,
[
4
])
data_3
=
helper
.
make_tensor_value_info
(
'3'
,
TensorProto
.
FLOAT
,
[
1
])
data_4
=
helper
.
make_tensor_value_info
(
'4'
,
TensorProto
.
FLOAT
,
[
2
,
3
,
1
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
,
4
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
,
"1"
,
"2"
,
"3"
,
"4"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
,
data_1
,
data_2
,
data_3
,
data_4
],
[
mean
])
@
onnx_test
def
mean_fp16_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
data_1
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
data_2
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
,
"1"
,
"2"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
,
data_1
,
data_2
],
[
mean
])
@
onnx_test
def
mean_invalid_broadcast_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
data_1
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
data_2
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
4
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
,
"1"
,
"2"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
,
data_1
,
data_2
],
[
mean
])
@
onnx_test
def
mean_single_input_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
],
[
mean
])
@
onnx_test
def
mean_test
():
data
=
[
helper
.
make_tensor_value_info
(
str
(
i
),
TensorProto
.
DOUBLE
,
[
2
,
2
,
2
])
for
i
in
range
(
10
)
]
data_names
=
[
str
(
i
)
for
i
in
range
(
10
)]
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
DOUBLE
,
[
2
,
2
,
2
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
data_names
,
outputs
=
[
"mean"
])
return
([
node
],
data
,
[
mean
])
@
onnx_test
@
onnx_test
def
min_test
():
def
min_test
():
a
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
3
])
a
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
3
])
...
@@ -2725,6 +2869,21 @@ def multinomial_test():
...
@@ -2725,6 +2869,21 @@ def multinomial_test():
return
([
node
],
[
input
],
[
output
])
return
([
node
],
[
input
],
[
output
])
@
onnx_test
def
multinomial_generated_seed_test
():
sample_size
=
10
input
=
helper
.
make_tensor_value_info
(
"input"
,
TensorProto
.
FLOAT
,
[
1
,
10
])
output
=
helper
.
make_tensor_value_info
(
"output"
,
TensorProto
.
INT32
,
[
1
,
10
])
node
=
onnx
.
helper
.
make_node
(
'Multinomial'
,
inputs
=
[
'input'
],
sample_size
=
sample_size
,
outputs
=
[
'output'
])
return
([
node
],
[
input
],
[
output
])
@
onnx_test
@
onnx_test
def
multinomial_dtype_error_test
():
def
multinomial_dtype_error_test
():
sample_size
=
10
sample_size
=
10
...
@@ -3176,6 +3335,21 @@ def randomnormal_dtype_error_test():
...
@@ -3176,6 +3335,21 @@ def randomnormal_dtype_error_test():
return
([
node
],
[],
[
output
])
return
([
node
],
[],
[
output
])
@
onnx_test
def
randomnormal_generated_seed_test
():
sample_size
=
10
input
=
helper
.
make_tensor_value_info
(
"input"
,
TensorProto
.
FLOAT
,
[
1
,
10
])
output
=
helper
.
make_tensor_value_info
(
"output"
,
TensorProto
.
INT32
,
[
1
,
10
])
node
=
onnx
.
helper
.
make_node
(
'RandomNormal'
,
inputs
=
[
'input'
],
sample_size
=
sample_size
,
outputs
=
[
'output'
])
return
([
node
],
[
input
],
[
output
])
@
onnx_test
@
onnx_test
def
randomnormal_shape_error_test
():
def
randomnormal_shape_error_test
():
dtype
=
1
dtype
=
1
...
@@ -3266,6 +3440,21 @@ def randomuniform_dtype_error_test():
...
@@ -3266,6 +3440,21 @@ def randomuniform_dtype_error_test():
return
([
node
],
[],
[
output
])
return
([
node
],
[],
[
output
])
@
onnx_test
def
randomuniform_generated_seed_test
():
sample_size
=
10
input
=
helper
.
make_tensor_value_info
(
"input"
,
TensorProto
.
FLOAT
,
[
1
,
10
])
output
=
helper
.
make_tensor_value_info
(
"output"
,
TensorProto
.
INT32
,
[
1
,
10
])
node
=
onnx
.
helper
.
make_node
(
'RandomUniform'
,
inputs
=
[
'input'
],
sample_size
=
sample_size
,
outputs
=
[
'output'
])
return
([
node
],
[
input
],
[
output
])
@
onnx_test
@
onnx_test
def
randomuniform_shape_error_test
():
def
randomuniform_shape_error_test
():
dtype
=
1
dtype
=
1
...
@@ -4290,6 +4479,44 @@ def softmax_nonstd_input_test():
...
@@ -4290,6 +4479,44 @@ def softmax_nonstd_input_test():
return
([
node0
,
node1
],
[
x
],
[
y
])
return
([
node0
,
node1
],
[
x
],
[
y
])
@
onnx_test
def
softsign_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
5
])
node
=
onnx
.
helper
.
make_node
(
'Softsign'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
def
softplus_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
5
])
node
=
onnx
.
helper
.
make_node
(
'Softplus'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
softsign_nd_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'Softsign'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
def
softplus_nd_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'Softplus'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
@
onnx_test
def
split_minus_axis_test
():
def
split_minus_axis_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
10
,
15
])
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
10
,
15
])
...
...
test/onnx/gen_onnx.pyc
View file @
3df20646
No preview for this file type
test/onnx/greaterorequal_test.onnx
0 → 100644
View file @
3df20646
greaterorequal_test:g
x1
x2y"GreaterOrEqualgreaterorequal_testZ
x1
Z
x2
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_default_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_default_test:i
xy"HardSigmoidhardsigmoid_default_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_double_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_double_test:
4
xy"HardSigmoid*
alpha>*
beta333?hardsigmoid_double_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_half_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_half_test:f
xy"HardSigmoidhardsigmoid_half_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_verify_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_verify_test:X
xy"HardSigmoidhardsigmoid_verify_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardswish_test.onnx
0 → 100644
View file @
3df20646
hardswish_test:M
xy" HardSwishhardswish_testZ
x
b
y
B
\ No newline at end of file
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment