Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3df20646
"src/include/vscode:/vscode.git/clone" did not exist on "e861793ab710f7a617225610d52c20c339c8def6"
Commit
3df20646
authored
Jan 31, 2022
by
Khalique Ahmed
Browse files
manual merge
parents
1005a693
d0543c96
Changes
104
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
826 additions
and
114 deletions
+826
-114
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
...pu/kernels/include/migraphx/kernels/integral_constant.hpp
+13
-10
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
+162
-0
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+28
-2
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+24
-10
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
...targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
+25
-14
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+54
-1
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
...argets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
+137
-74
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+4
-1
test/auto_contiguous_test.cpp
test/auto_contiguous_test.cpp
+34
-0
test/fuse_pointwise.cpp
test/fuse_pointwise.cpp
+29
-0
test/onnx/gen_onnx.py
test/onnx/gen_onnx.py
+227
-0
test/onnx/gen_onnx.pyc
test/onnx/gen_onnx.pyc
+0
-0
test/onnx/greaterorequal_test.onnx
test/onnx/greaterorequal_test.onnx
+16
-0
test/onnx/hardsigmoid_default_test.onnx
test/onnx/hardsigmoid_default_test.onnx
+15
-0
test/onnx/hardsigmoid_double_test.onnx
test/onnx/hardsigmoid_double_test.onnx
+17
-0
test/onnx/hardsigmoid_half_test.onnx
test/onnx/hardsigmoid_half_test.onnx
+17
-0
test/onnx/hardsigmoid_verify_test.onnx
test/onnx/hardsigmoid_verify_test.onnx
+11
-0
test/onnx/hardswish_test.onnx
test/onnx/hardswish_test.onnx
+11
-0
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
View file @
3df20646
...
...
@@ -5,28 +5,31 @@
namespace
migraphx
{
template
<
class
T
,
T
v
>
template
<
class
T
,
T
V
>
struct
integral_constant
{
static
constexpr
T
value
=
v
;
static
constexpr
T
value
=
V
;
using
value_type
=
T
;
using
type
=
integral_constant
;
constexpr
operator
value_type
()
const
noexcept
{
return
value
;
}
constexpr
value_type
operator
()()
const
noexcept
{
return
value
;
}
static
constexpr
type
to
()
{
return
{};
}
};
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \
template <class T, T
v
, class U, U w> \
constexpr inline integral_constant<decltype(
v
op w), (
v
op w)> operator op( \
integral_constant<T,
v
>, integral_constant<U, w>) noexcept \
template <class T, T
V
, class U, U w> \
constexpr inline integral_constant<decltype(
V
op w), (
V
op w)> operator op( \
integral_constant<T,
V
>, integral_constant<U, w>) noexcept \
{ \
return {}; \
}
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \
template <class T, T
v
> \
constexpr inline integral_constant<decltype(op
v
), (op
v
)> operator op( \
integral_constant<T,
v
>) noexcept \
template <class T, T
V
> \
constexpr inline integral_constant<decltype(op
V
), (op
V
)> operator op( \
integral_constant<T,
V
>) noexcept \
{ \
return {}; \
}
...
...
@@ -64,8 +67,8 @@ using false_type = bool_constant<false>;
template
<
index_int
N
>
using
index_constant
=
integral_constant
<
index_int
,
N
>
;
template
<
auto
v
>
static
constexpr
auto
_c
=
integral_constant
<
decltype
(
v
),
v
>
{};
template
<
auto
V
>
static
constexpr
auto
_c
=
integral_constant
<
decltype
(
V
),
V
>
{};
// NOLINT
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
0 → 100644
View file @
3df20646
#ifndef MIGRAPHX_GUARD_KERNELS_MATH_HPP
#define MIGRAPHX_GUARD_KERNELS_MATH_HPP
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <hip/hip_fp16.h>
#include <hip/math_functions.h>
namespace
migraphx
{
namespace
math
{
constexpr
float
as_float
(
migraphx
::
half
x
)
{
return
x
;
}
template
<
class
T
>
constexpr
T
as_float
(
T
x
)
{
return
x
;
}
}
// namespace math
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH(name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) MIGRAPHX_RETURNS(fname(xs...))
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_VEC(name) \
template <class... Ts, MIGRAPHX_REQUIRES(is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) \
{ \
return vec_transform(xs...)([](auto... ys) { return name(ys...); }); \
}
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_FOR(type, name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(type x, Ts... xs)->type \
{ \
return fname(x, xs...); \
}
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_HALF(name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(migraphx::half x, Ts... xs) \
MIGRAPHX_RETURNS(fname(math::as_float(x), math::as_float(xs)...))
MIGRAPHX_DEVICE_MATH
(
abs
,
::
abs
)
MIGRAPHX_DEVICE_MATH
(
acos
,
::
acos
)
MIGRAPHX_DEVICE_MATH
(
acosh
,
::
acosh
)
MIGRAPHX_DEVICE_MATH
(
asin
,
::
asin
)
MIGRAPHX_DEVICE_MATH
(
asinh
,
::
asinh
)
MIGRAPHX_DEVICE_MATH
(
atan
,
::
atan
)
MIGRAPHX_DEVICE_MATH
(
atanh
,
::
atanh
)
MIGRAPHX_DEVICE_MATH
(
ceil
,
::
ceil
)
MIGRAPHX_DEVICE_MATH
(
cos
,
::
cos
)
MIGRAPHX_DEVICE_MATH
(
cosh
,
::
cosh
)
MIGRAPHX_DEVICE_MATH
(
erf
,
::
erf
)
MIGRAPHX_DEVICE_MATH
(
exp
,
::
exp
)
MIGRAPHX_DEVICE_MATH
(
floor
,
::
floor
)
MIGRAPHX_DEVICE_MATH
(
log
,
::
log
)
MIGRAPHX_DEVICE_MATH
(
pow
,
::
pow
)
MIGRAPHX_DEVICE_MATH
(
round
,
::
round
)
MIGRAPHX_DEVICE_MATH
(
rsqrt
,
::
rsqrt
)
MIGRAPHX_DEVICE_MATH
(
sin
,
::
sin
)
MIGRAPHX_DEVICE_MATH
(
sinh
,
::
sinh
)
MIGRAPHX_DEVICE_MATH
(
sqrt
,
::
sqrt
)
MIGRAPHX_DEVICE_MATH
(
tan
,
::
tan
)
MIGRAPHX_DEVICE_MATH
(
tanh
,
::
tanh
)
// Float overloads
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
acos
,
::
acosf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
acosh
,
::
acoshf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
asin
,
::
asinf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
asinh
,
::
asinhf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
atan
,
::
atanf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
atanh
,
::
atanhf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
cos
,
::
cosf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
cosh
,
::
coshf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
rsqrt
,
::
rsqrtf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
sin
,
::
sinf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
sinh
,
::
sinhf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
tan
,
::
tanf
)
MIGRAPHX_DEVICE_MATH_FOR
(
float
,
tanh
,
::
tanhf
)
// Builtin half functions
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
abs
,
::
__habs
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
exp
,
::
hexp
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
log
,
::
hlog
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
rsqrt
,
::
hrsqrt
)
MIGRAPHX_DEVICE_MATH_FOR
(
migraphx
::
half
,
sqrt
,
::
hsqrt
)
// Use float to compute half overload
MIGRAPHX_DEVICE_MATH_HALF
(
acos
,
::
acos
)
MIGRAPHX_DEVICE_MATH_HALF
(
acosh
,
::
acosh
)
MIGRAPHX_DEVICE_MATH_HALF
(
asin
,
::
asin
)
MIGRAPHX_DEVICE_MATH_HALF
(
asinh
,
::
asinh
)
MIGRAPHX_DEVICE_MATH_HALF
(
atan
,
::
atan
)
MIGRAPHX_DEVICE_MATH_HALF
(
atanh
,
::
atanh
)
MIGRAPHX_DEVICE_MATH_HALF
(
ceil
,
::
ceil
)
MIGRAPHX_DEVICE_MATH_HALF
(
cos
,
::
cos
)
MIGRAPHX_DEVICE_MATH_HALF
(
cosh
,
::
cosh
)
MIGRAPHX_DEVICE_MATH_HALF
(
erf
,
::
erf
)
MIGRAPHX_DEVICE_MATH_HALF
(
floor
,
::
floor
)
MIGRAPHX_DEVICE_MATH_HALF
(
pow
,
::
pow
)
MIGRAPHX_DEVICE_MATH_HALF
(
round
,
::
round
)
MIGRAPHX_DEVICE_MATH_HALF
(
sin
,
::
sin
)
MIGRAPHX_DEVICE_MATH_HALF
(
sinh
,
::
sinh
)
MIGRAPHX_DEVICE_MATH_HALF
(
tan
,
::
tan
)
MIGRAPHX_DEVICE_MATH_HALF
(
tanh
,
::
tanh
)
template
<
class
T
,
class
U
>
constexpr
auto
where
(
bool
cond
,
const
T
&
a
,
const
U
&
b
)
{
return
cond
?
a
:
b
;
}
MIGRAPHX_DEVICE_MATH_VEC
(
abs
)
MIGRAPHX_DEVICE_MATH_VEC
(
acos
)
MIGRAPHX_DEVICE_MATH_VEC
(
acosh
)
MIGRAPHX_DEVICE_MATH_VEC
(
asin
)
MIGRAPHX_DEVICE_MATH_VEC
(
asinh
)
MIGRAPHX_DEVICE_MATH_VEC
(
atan
)
MIGRAPHX_DEVICE_MATH_VEC
(
atanh
)
MIGRAPHX_DEVICE_MATH_VEC
(
ceil
)
MIGRAPHX_DEVICE_MATH_VEC
(
cos
)
MIGRAPHX_DEVICE_MATH_VEC
(
cosh
)
MIGRAPHX_DEVICE_MATH_VEC
(
erf
)
MIGRAPHX_DEVICE_MATH_VEC
(
exp
)
MIGRAPHX_DEVICE_MATH_VEC
(
floor
)
MIGRAPHX_DEVICE_MATH_VEC
(
log
)
MIGRAPHX_DEVICE_MATH_VEC
(
pow
)
MIGRAPHX_DEVICE_MATH_VEC
(
round
)
MIGRAPHX_DEVICE_MATH_VEC
(
rsqrt
)
MIGRAPHX_DEVICE_MATH_VEC
(
sin
)
MIGRAPHX_DEVICE_MATH_VEC
(
sinh
)
MIGRAPHX_DEVICE_MATH_VEC
(
sqrt
)
MIGRAPHX_DEVICE_MATH_VEC
(
tan
)
MIGRAPHX_DEVICE_MATH_VEC
(
tanh
)
MIGRAPHX_DEVICE_MATH_VEC
(
where
)
template
<
class
T
,
class
U
>
constexpr
auto
max
(
const
T
&
a
,
const
U
&
b
)
{
return
where
(
a
<
b
,
b
,
a
);
}
template
<
class
T
,
class
U
>
constexpr
auto
min
(
const
T
&
a
,
const
U
&
b
)
{
return
where
(
a
>
b
,
b
,
a
);
}
template
<
class
T
,
class
U
>
constexpr
auto
convert
(
U
v
)
{
return
vec_transform
(
v
)([](
auto
x
)
->
T
{
return
x
;
});
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_MATH_HPP
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
100755 → 100644
View file @
3df20646
...
...
@@ -3,19 +3,45 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/math.hpp>
#include <migraphx/kernels/preload.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/args.hpp>
namespace
migraphx
{
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
preload
<
typename
T
::
type
>
(
idx
,
xs
...)([
&
](
auto
...
ps
)
{
idx
.
global_stride
(
out
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
auto
multi_idx
=
out
.
get_shape
().
multi
(
i
);
out
[
multi_idx
]
=
f
(
ps
[
multi_idx
]...);
out
[
multi_idx
]
=
implicit_conversion
(
f
(
ps
[
multi_idx
]...)
)
;
});
});
}
...
...
@@ -23,7 +49,7 @@ __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
template
<
class
F
,
class
...
Ts
>
__device__
void
pointwise
(
F
f
,
Ts
*
...
ps
)
{
auto
t
=
transform_args
(
make_tensors
(),
rotate_last
());
auto
t
=
transform_args
(
make_tensors
(),
rotate_last
()
,
auto_vectorize
()
);
t
(
ps
...)([
&
](
auto
...
xs
)
{
auto
idx
=
make_index
();
pointwise_tensor
(
idx
,
f
,
xs
...);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
100755 → 100644
View file @
3df20646
...
...
@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss)
auto
each
=
[
&
](
auto
x
)
{
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
constexpr
auto
size
=
_c
<
s
.
element_space
()
>
;
if
constexpr
(
not
s
.
broadcasted
())
return
f
(
x
,
offset
,
false_type
{});
else
if
constexpr
((
s
.
elements
()
-
size
)
<
64
)
if
constexpr
(
not
s
.
broadcasted
()
or
(
s
.
elements
()
-
size
)
<
64
)
return
f
(
x
,
offset
,
false_type
{});
else
{
...
...
@@ -31,7 +29,7 @@ constexpr auto traverse_preload(Shapes... ss)
}
template
<
class
T
,
class
...
Shapes
>
constexpr
index_int
compute_preload_size
(
Shapes
...)
constexpr
index_int
compute_preload_size
_c
(
Shapes
...)
{
index_int
size
=
0
;
traverse_preload
<
T
>
(
Shapes
{}...)(
...
...
@@ -39,6 +37,12 @@ constexpr index_int compute_preload_size(Shapes...)
return
size
;
}
template
<
class
T
,
class
...
Shapes
>
constexpr
auto
compute_preload_size
(
Shapes
...)
{
return
_c
<
compute_preload_size_c
<
T
>
(
Shapes
{}...)
>
;
}
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
auto
preload_copy
(
index
idx
,
F
f
,
__shared__
T
*
buffer
,
Ts
...
xs
)
{
...
...
@@ -50,11 +54,21 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
[
&
](
auto
x
,
auto
offset
,
auto
copy
)
{
if
constexpr
(
copy
)
{
auto
v
=
vectorize
(
x
);
auto
b
=
as_vec
(
tensor_vec_size
(
v
),
buffer
+
offset
);
idx
.
local_stride
(
v
.
get_shape
().
element_space
(),
[
&
](
auto
i
)
{
b
[
i
]
=
v
.
data
()[
i
];
});
return
x
.
with
(
buffer
+
offset
);
if
constexpr
(
decltype
(
tensor_vec_size
(
x
)){}
==
0
)
{
auto
v
=
vectorize
(
x
);
auto
b
=
as_vec
(
tensor_vec_size
(
v
),
buffer
+
offset
);
idx
.
local_stride
(
v
.
get_shape
().
element_space
(),
[
&
](
auto
i
)
{
b
[
i
]
=
v
.
data
()[
i
];
});
return
x
.
with
(
buffer
+
offset
);
}
else
{
auto
b
=
as_vec
(
tensor_vec_size
(
x
),
buffer
+
offset
);
idx
.
local_stride
(
x
.
get_shape
().
element_space
(),
[
&
](
auto
i
)
{
b
[
i
]
=
x
.
data
()[
i
];
});
return
x
.
with
(
b
);
}
}
else
{
...
...
@@ -80,7 +94,7 @@ template <class T, class... Ts>
__device__
auto
preload
(
index
idx
,
Ts
...
xs
)
{
using
type
=
typename
remove_vec
<
T
>::
type
;
constexpr
auto
size
=
compute_preload_size
<
type
>
(
xs
.
get_shape
()...);
constexpr
auto
size
=
decltype
(
compute_preload_size
<
type
>
(
xs
.
get_shape
()...)
){}
;
const
index_int
max_size
=
512
*
sizeof
(
type
);
return
[
=
](
auto
f
)
{
if
constexpr
(
size
>
0
and
size
<
max_size
)
...
...
src/targets/gpu/kernels/include/migraphx/kernels/print.hpp
View file @
3df20646
#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#include <
hip/hip_runtime.h
>
#include <
migraphx/kernels/hip.hpp
>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/algorithm.hpp>
...
...
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
View file @
3df20646
...
...
@@ -4,7 +4,7 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <
args
.hpp>
#include <
migraphx/kernels/array
.hpp>
namespace
migraphx
{
...
...
@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
return
op
.
final
(
output_val
,
count
);
}
template
<
class
T
,
class
U
,
class
V
,
class
W
>
__device__
void
ro
i
align
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
const
W
&
y_t
)
template
<
class
T
1
,
class
T2
,
class
T3
,
class
T4
>
struct
roalign
_settings
{
const
float
roi_offset
=
ROIS_OFFSET
;
const
bool
is_avg_pooling
=
IS_AVG_POOLING
;
const
int64_t
sampling_ratio
=
SAMPLING_RATIO
;
const
float
spatial_scale
=
SPATIAL_SCALE
;
T1
roi_offset
{};
T2
is_avg_pooling
{};
T3
sampling_ratio
{};
T4
spatial_scale
{};
};
template
<
class
...
Ts
>
constexpr
roalign_settings
<
Ts
...
>
make_roalign_settings
(
Ts
...
xs
)
{
return
{
xs
...};
}
template
<
class
T
,
class
U
,
class
V
,
class
W
,
class
Settings
>
__device__
void
roialign
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
const
W
&
y_t
,
Settings
s
)
{
auto
index
=
make_index
();
const
auto
*
x
=
x_t
.
data
();
const
auto
*
rois
=
rois_t
.
data
();
...
...
@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const
auto
*
offset_rois
=
rois
+
(
n
*
roi_column_num
);
const
int
batch_ind
=
ind
[
n
];
array
<
float
,
2
>
roi_starts
=
{
offset_rois
[
1
]
*
spatial_scale
,
offset_rois
[
0
]
*
spatial_scale
};
array
<
float
,
2
>
roi_ends
=
{
offset_rois
[
3
]
*
spatial_scale
,
offset_rois
[
2
]
*
spatial_scale
};
array
<
float
,
2
>
roi_starts
=
{
offset_rois
[
1
]
*
s
.
spatial_scale
,
offset_rois
[
0
]
*
s
.
spatial_scale
};
array
<
float
,
2
>
roi_ends
=
{
offset_rois
[
3
]
*
s
.
spatial_scale
,
offset_rois
[
2
]
*
s
.
spatial_scale
};
array
<
float
,
2
>
roi_size
{};
array
<
float
,
2
>
bin_size
{};
...
...
@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
bin_size
[
ii
]
=
roi_size
[
ii
]
/
out_dims
[
ii
];
bin_grid_size
[
ii
]
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
std
::
ceil
(
roi_size
[
ii
]
/
out_dims
[
ii
]);
(
s
.
sampling_ratio
>
0
)
?
s
.
sampling_ratio
:
std
::
ceil
(
roi_size
[
ii
]
/
out_dims
[
ii
]);
}
const
auto
*
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
if
constexpr
(
is_avg_pooling
)
if
constexpr
(
s
.
is_avg_pooling
)
{
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
...
...
@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{
ph
,
pw
},
bin_grid_size
,
in_dims
,
roi_offset
,
s
.
roi_offset
,
avg_pool
{});
}
else
...
...
@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{
ph
,
pw
},
bin_grid_size
,
in_dims
,
roi_offset
,
s
.
roi_offset
,
max_pool
{});
}
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
View file @
3df20646
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#include <
hip/hip_runtime.h
>
#include <
migraphx/kernels/hip.hpp
>
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
100755 → 100644
View file @
3df20646
...
...
@@ -3,6 +3,7 @@
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/functional.hpp>
namespace
migraphx
{
...
...
@@ -13,7 +14,7 @@ constexpr auto vec_size(vec<T, N>)
}
template
<
class
T
>
constexpr
auto
vec_size
(
T
,
...)
constexpr
auto
vec_size
(
T
,
...)
// NOLINT
{
return
index_constant
<
0
>
{};
}
...
...
@@ -24,6 +25,38 @@ constexpr auto vec_size()
return
decltype
(
vec_size
(
T
{})){};
}
template
<
class
...
Ts
>
constexpr
auto
is_any_vec
()
{
if
constexpr
(
sizeof
...(
Ts
)
==
0
)
return
false_type
{};
else
return
bool_constant
<
((
vec_size
<
Ts
>
()
+
...)
>
0
)
>
{};
}
template
<
class
T
,
class
I
>
constexpr
auto
vec_at
(
T
x
,
I
i
)
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
return
x
;
else
{
MIGRAPHX_ASSERT
(
i
<
vec_size
<
T
>
());
return
x
[
i
];
}
}
template
<
class
...
Ts
>
constexpr
auto
common_vec_size
()
{
return
fold
([](
auto
x
,
auto
y
)
{
if
constexpr
(
x
>
y
)
return
x
;
else
return
y
;
})(
vec_size
<
Ts
>
()...);
}
template
<
index_int
N
,
class
T
>
__device__
__host__
auto
as_vec
(
T
*
x
)
{
...
...
@@ -33,5 +66,25 @@ __device__ __host__ auto as_vec(T* x)
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
template
<
class
...
Ts
>
constexpr
auto
vec_transform
(
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
if
constexpr
(
is_any_vec
<
Ts
...
>
())
{
using
type
=
decltype
(
f
(
vec_at
(
xs
,
0
)...));
constexpr
auto
size
=
common_vec_size
<
Ts
...
>
();
vec
<
type
,
size
>
result
=
{
0
};
for
(
int
i
=
0
;
i
<
size
;
i
++
)
result
[
i
]
=
f
(
vec_at
(
xs
,
i
)...);
return
result
;
}
else
{
return
f
(
xs
...);
}
};
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
View file @
3df20646
...
...
@@ -7,40 +7,70 @@
namespace
migraphx
{
template
<
class
T
>
constexpr
auto
tensor_vec_size
(
T
)
constexpr
auto
tensor_vec_size
()
{
return
vec_size
<
typename
T
::
type
>
();
}
template
<
index_int
N
,
class
Shape
>
constexpr
auto
as_vec_shape
(
Shape
s
)
template
<
class
T
>
constexpr
auto
tensor_vec_size
(
T
)
{
auto
lens
=
transform
(
s
.
lens
,
s
.
strides
,
[](
auto
len
,
auto
stride
)
{
if
(
stride
==
1
)
return
len
/
N
;
else
return
len
;
});
auto
strides
=
transform
(
s
.
strides
,
[](
auto
stride
)
{
if
(
stride
==
1
)
return
stride
;
return
stride
/
N
;
return
tensor_vec_size
<
T
>
();
}
template
<
index_int
N
,
class
Shape
,
class
Axis
>
constexpr
auto
shape_step
(
Shape
s
,
Axis
)
{
static_assert
(
N
>
0
,
"Vector size must be non-zero"
);
return
sequence
(
s
.
lens
.
size
(),
[
&
](
auto
...
is
)
{
auto
lens
=
transform
(
s
.
lens
,
index_ints
<
is
...
>
{},
[
&
](
auto
i
,
auto
j
)
{
constexpr
auto
axis
=
Axis
::
to
();
MIGRAPHX_ASSERT
(
i
!=
0
);
MIGRAPHX_ASSERT
(
j
!=
axis
or
i
%
N
==
0
);
if
(
j
==
axis
)
return
i
/
N
;
else
return
i
;
});
auto
strides
=
transform
(
s
.
strides
,
index_ints
<
is
...
>
{},
[
&
](
auto
i
,
auto
j
)
{
constexpr
auto
axis
=
Axis
::
to
();
// If stride of the axis is zero then we dont need to adjust the other strides
if
(
Shape
{}.
strides
[
axis
]
==
0
)
return
i
;
MIGRAPHX_ASSERT
(
j
==
axis
or
i
%
N
==
0
);
if
(
j
==
axis
)
return
i
;
else
return
i
/
N
;
});
MIGRAPHX_ASSERT
(
make_shape
(
lens
,
strides
).
elements
()
*
N
==
s
.
elements
());
MIGRAPHX_ASSERT
(
strides
[
Axis
{}]
==
0
or
make_shape
(
lens
,
strides
).
element_space
()
*
N
==
s
.
element_space
());
return
make_shape
(
lens
,
strides
);
});
MIGRAPHX_ASSERT
(
make_shape
(
lens
,
strides
).
element_space
()
*
N
==
s
.
element_space
());
return
make_shape
(
lens
,
strides
);
}
template
<
index_int
N
,
class
T
>
__device__
__host__
auto
as_vec
(
T
x
)
// Bools can not be used as a vector type so convert it to int8
template
<
class
T
>
__device__
__host__
T
*
remove_bool
(
T
*
x
)
{
return
x
;
}
inline
__device__
__host__
int8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
int8_t
*>
(
x
);
}
template
<
index_int
N
,
class
T
,
class
Axis
>
__device__
__host__
auto
as_vec
(
T
x
,
Axis
axis
)
{
if
constexpr
(
N
==
0
)
return
x
;
else
return
make_tensor_view
(
as_vec
<
N
>
(
x
.
data
()),
as_vec_shape
<
N
>
(
x
.
get_shape
()));
return
make_tensor_view
(
as_vec
<
N
>
(
remove_bool
(
x
.
data
())),
shape_step
<
N
>
(
x
.
get_shape
(),
axis
));
}
template
<
index_int
N
,
class
T
,
class
Axis
>
constexpr
auto
tensor_step
(
T
x
,
Axis
)
constexpr
auto
tensor_step
(
T
x
,
Axis
axis
)
{
if
constexpr
(
N
==
0
)
{
...
...
@@ -49,17 +79,8 @@ constexpr auto tensor_step(T x, Axis)
else
{
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
MIGRAPHX_ASSERT
(
s
.
strides
[
Axis
{}]
==
0
);
return
sequence
(
x
.
get_shape
().
lens
.
size
(),
[
&
](
auto
...
is
)
{
auto
lens
=
transform
(
s
.
lens
,
index_ints
<
is
...
>
{},
[
&
](
auto
i
,
auto
j
)
{
constexpr
auto
axis
=
Axis
{};
if
(
j
==
axis
)
return
i
/
N
;
else
return
i
;
});
return
make_tensor_view
(
x
.
data
(),
make_shape
(
lens
,
s
.
strides
));
});
MIGRAPHX_ASSERT
(
s
.
strides
[
axis
]
==
0
);
return
make_tensor_view
(
x
.
data
(),
shape_step
<
N
>
(
s
,
axis
));
}
}
...
...
@@ -69,42 +90,71 @@ __device__ __host__ auto as_vec(IntegralConstant ic, T&& x)
return
as_vec
<
ic
>
(
x
);
}
template
<
class
...
Shape
s
>
constexpr
index_int
find_vector_axis
(
Shape
s
...
s
s
)
template
<
class
Shape
>
constexpr
index_int
find_vector_axis
_c
(
Shape
s
)
{
// Find the fastest axis that is not broadcasted
index_int
axis
=
0
;
bool
b
=
false
;
for
(
index_int
i
=
1
;
i
<
s
.
lens
.
size
();
i
++
)
{
if
(
s
.
strides
[
i
]
==
0
)
continue
;
if
(
s
.
strides
[
axis
]
==
0
or
pack_compare
(
less
{},
pack
(
s
.
strides
[
i
],
s
.
lens
[
i
]),
pack
(
s
.
strides
[
axis
],
s
.
lens
[
axis
])))
axis
=
i
;
}
return
axis
;
}
template
<
class
...
Shapes
>
constexpr
index_int
find_vector_axis_c
(
Shapes
...
ss
)
{
const
bool
all_broadcasted
=
(
ss
.
broadcasted
()
and
...);
index_int
axis
=
0
;
bool
b
=
false
;
by
([
&
](
auto
s
)
{
if
(
s
.
broadcasted
()
or
b
)
if
(
b
)
return
;
auto
it
=
find
(
s
.
strides
.
begin
(),
s
.
strides
.
end
(),
1
);
if
(
it
==
s
.
strides
.
en
d
())
// Skip broadcasted shapes if there are shapes not broadcasted
if
(
not
all_broadcasted
and
s
.
broadcaste
d
())
return
;
axis
=
it
-
s
.
strides
.
begin
();
b
=
true
;
axis
=
find_vector_axis_c
(
s
);
if
(
s
.
strides
[
axis
]
==
1
)
b
=
true
;
})(
ss
...);
if
(
not
b
)
return
-
1
;
return
axis
;
}
template
<
class
...
Shapes
>
constexpr
auto
find_vector_axis
(
Shapes
...)
{
return
_c
<
find_vector_axis_c
(
Shapes
{}...)
>
;
}
template
<
index_int
N
,
class
Axis
,
class
...
Shapes
>
constexpr
auto
is_vectorizable
(
Axis
axis
,
Shapes
...
ss
)
constexpr
auto
is_vectorizable
_c
(
Axis
axis
,
Shapes
...
ss
)
{
return
(((
ss
.
lens
[
axis
]
%
N
)
==
0
and
(
ss
.
strides
[
axis
]
==
1
or
ss
.
strides
[
axis
]
==
0
))
and
return
((
axis
<
ss
.
lens
.
size
()
and
ss
.
lens
[
axis
]
%
N
==
0
and
// Only vectorize broadcasted types with stride 0, since this causes issues in the
// preloader
((
not
ss
.
broadcasted
()
and
ss
.
strides
[
axis
]
==
1
)
or
ss
.
strides
[
axis
]
==
0
))
and
...);
}
template
<
index_int
N
,
class
...
Shapes
>
constexpr
bool
is_vectorizable
(
Shapes
...
ss
)
template
<
index_int
N
,
class
Axis
,
class
...
Shapes
>
constexpr
auto
is_vectorizable
(
Axis
,
Shapes
...)
{
return
(
is_vectorizable
<
N
>
(
ss
,
find_vector_axis
(
ss
))
and
...);
return
_c
<
is_vectorizable
_c
<
N
>
(
Axis
::
to
(),
Shapes
{}
...)
>
;
}
template
<
class
P
>
constexpr
auto
find_vectorize_size
(
P
pred
)
{
if
constexpr
(
pred
(
_c
<
4
>
))
if
constexpr
(
decltype
(
pred
(
_c
<
4
>
))
{})
return
_c
<
4
>
;
else
if
constexpr
(
pred
(
_c
<
2
>
))
else
if
constexpr
(
decltype
(
pred
(
_c
<
2
>
))
{})
return
_c
<
2
>
;
else
return
_c
<
0
>
;
...
...
@@ -113,11 +163,12 @@ constexpr auto find_vectorize_size(P pred)
template
<
class
T
>
__host__
__device__
auto
vectorize
(
T
x
)
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
if
constexpr
(
tensor_
vec_size
<
T
>
()
==
0
)
{
constexpr
auto
axis
=
find_vector_axis
(
x
.
get_shape
());
constexpr
auto
n
=
find_vectorize_size
([
&
](
auto
i
)
{
return
_c
<
is_vectorizable
<
i
>
(
x
.
get_shape
())
>
;
});
return
as_vec
<
n
>
(
x
);
find_vectorize_size
([
&
](
auto
i
)
{
return
is_vectorizable
<
i
>
(
axis
,
x
.
get_shape
());
});
return
as_vec
<
n
>
(
x
,
axis
);
}
else
{
...
...
@@ -125,34 +176,46 @@ __host__ __device__ auto vectorize(T x)
}
}
template
<
class
F
,
class
...
Ts
>
inline
__device__
__host__
auto
auto_vectorize_impl
(
F
f
,
Ts
...
xs
)
{
// TODO: Just check there a single axis of 1
constexpr
bool
packed_or_broadcasted
=
((
xs
.
get_shape
().
packed
()
or
xs
.
get_shape
().
broadcasted
())
and
...);
if
constexpr
(
packed_or_broadcasted
)
{
constexpr
auto
axis
=
decltype
(
find_vector_axis
(
xs
.
get_shape
()...)){};
constexpr
auto
n
=
find_vectorize_size
(
[
&
](
auto
i
)
{
return
is_vectorizable
<
i
>
(
axis
,
xs
.
get_shape
()...);
});
by
(
[
&
](
auto
x
)
{
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
if
constexpr
(
axis
<
s
.
strides
.
size
())
{
MIGRAPHX_ASSERT
(
s
.
strides
[
axis
]
==
0
or
s
.
strides
[
axis
]
==
1
);
MIGRAPHX_ASSERT
(
s
.
lens
[
axis
]
>
0
);
MIGRAPHX_ASSERT
(
n
==
0
or
s
.
lens
[
axis
]
%
n
==
0
);
if
constexpr
(
s
.
strides
[
axis
]
==
0
)
return
tensor_step
<
n
>
(
x
,
axis
);
else
return
as_vec
<
n
>
(
x
,
axis
);
}
else
{
return
x
;
}
},
f
)(
xs
...);
}
else
{
f
(
xs
...);
}
}
inline
__device__
__host__
auto
auto_vectorize
()
{
return
[](
auto
...
xs
)
{
return
[
=
](
auto
f
)
{
// TODO: Just check there a single axis of 1
constexpr
bool
packed_or_broadcasted
=
((
xs
.
get_shape
().
packed
()
or
xs
.
get_shape
().
broadcasted
())
and
...);
if
constexpr
(
packed_or_broadcasted
)
{
constexpr
auto
axis
=
find_vector_axis
(
xs
.
get_shape
()...);
constexpr
auto
n
=
find_vectorize_size
(
[
&
](
auto
i
)
{
return
_c
<
is_vectorizable
<
i
>
(
axis
,
xs
.
get_shape
()...)
>
;
});
by
(
[
&
](
auto
x
)
{
constexpr
auto
s
=
x
.
get_shape
();
if
constexpr
(
s
.
strides
[
axis
]
==
0
)
return
tensor_step
<
n
>
(
x
,
axis
);
else
return
as_vec
<
n
>
(
x
);
},
f
)(
xs
...);
}
else
{
f
(
xs
...);
}
};
};
return
[](
auto
...
xs
)
{
return
[
=
](
auto
f
)
{
auto_vectorize_impl
(
f
,
xs
...);
};
};
}
}
// namespace migraphx
...
...
src/targets/gpu/lowering.cpp
View file @
3df20646
...
...
@@ -60,6 +60,7 @@ struct miopen_apply
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
prog_output_names
{};
bool
offload_copy
=
false
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
context
&
get_context
()
const
{
...
...
@@ -103,6 +104,8 @@ struct miopen_apply
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
&
ctx
=
get_context
();
if
(
ctx
.
get_stream
().
get_device_name
()
==
"gfx908"
)
compute_fp32
=
true
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
...
...
@@ -337,7 +340,7 @@ struct miopen_apply
}
}
return
mod
->
replace_instruction
(
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
},
refs
);
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
,
compute_fp32
},
refs
);
});
}
...
...
test/auto_contiguous_test.cpp
View file @
3df20646
...
...
@@ -101,4 +101,38 @@ TEST_CASE(after_param_broadcast)
EXPECT
(
not
m
.
get_output_shapes
().
back
().
broadcasted
());
}
TEST_CASE
(
two_transpose_gather
)
{
migraphx
::
module
m1
;
{
auto
data
=
m1
.
add_parameter
(
"2x2"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
5
}});
auto
ind
=
m1
.
add_parameter
(
"ind"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}});
auto
td
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
2
,
3
,
1
}}}),
data
);
auto
sd
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"softmax"
,
{{
"axis"
,
2
}}),
td
);
auto
bd
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
3
,
1
,
2
}}}),
sd
);
auto
r
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"gather"
,
{{
"axis"
,
2
}}),
bd
,
ind
);
m1
.
add_return
({
r
});
}
run_pass
(
m1
);
migraphx
::
module
m2
;
{
auto
data
=
m2
.
add_parameter
(
"2x2"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
5
}});
auto
ind
=
m2
.
add_parameter
(
"ind"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}});
auto
td
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
2
,
3
,
1
}}}),
data
);
auto
ctd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
td
);
auto
sd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"softmax"
,
{{
"axis"
,
2
}}),
ctd
);
auto
bd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
3
,
1
,
2
}}}),
sd
);
auto
cbd
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
bd
);
auto
r
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"gather"
,
{{
"axis"
,
2
}}),
cbd
,
ind
);
m2
.
add_return
({
r
});
}
EXPECT
(
m1
==
m2
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/fuse_pointwise.cpp
View file @
3df20646
...
...
@@ -73,6 +73,35 @@ TEST_CASE(double_add)
EXPECT
(
p1
.
sort
()
==
p2
.
sort
());
}
TEST_CASE
(
double_add_without_return
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
add1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
x
,
y
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
add1
,
z
);
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
fadd
=
add_pointwise
(
p2
,
"main:pointwise0"
,
{
x
,
y
,
z
},
[
=
](
auto
*
pm
,
const
auto
&
inputs
)
{
auto
add1
=
pm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
inputs
[
0
],
inputs
[
1
]);
return
pm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
add1
,
inputs
[
2
]);
});
mm
->
add_instruction
(
migraphx
::
make_op
(
"identity"
),
fadd
);
}
EXPECT
(
p1
.
sort
()
==
p2
.
sort
());
}
TEST_CASE
(
used_twice_not_fused
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
...
...
test/onnx/gen_onnx.py
View file @
3df20646
...
...
@@ -1618,6 +1618,22 @@ def greater_bool_test():
return
([
node1
,
node2
],
[
x1
,
x2
],
[
y
])
@
onnx_test
def
greaterorequal_test
():
x1
=
helper
.
make_tensor_value_info
(
'x1'
,
TensorProto
.
FLOAT
,
[
3
])
x2
=
helper
.
make_tensor_value_info
(
'x2'
,
TensorProto
.
FLOAT
,
[
3
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
])
node
=
onnx
.
helper
.
make_node
(
'GreaterOrEqual'
,
inputs
=
[
'x1'
,
'x2'
],
outputs
=
[
'y'
],
)
return
([
node
],
[
x1
,
x2
],
[
y
])
@
onnx_test
def
group_conv_test
():
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
4
,
16
,
16
])
...
...
@@ -1634,6 +1650,60 @@ def group_conv_test():
return
([
node
],
[
x
,
y
],
[
z
])
@
onnx_test
def
hardsigmoid_default_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
1
,
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardsigmoid_double_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
DOUBLE
,
[
1
,
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
DOUBLE
,
[
1
,
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
],
alpha
=
0.3
,
beta
=
0.7
)
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardsigmoid_half_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT16
,
[
1
,
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT16
,
[
1
,
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardsigmoid_verify_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSigmoid'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
hardswish_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
2
,
5
])
node
=
onnx
.
helper
.
make_node
(
'HardSwish'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
if_else_test
():
x
=
onnx
.
helper
.
make_tensor_value_info
(
'x'
,
onnx
.
TensorProto
.
FLOAT
,
[
2
,
3
])
...
...
@@ -2692,6 +2762,80 @@ def maxpool_same_upper_test():
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
mean_broadcast_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
3
,
4
])
data_1
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
,
4
])
data_2
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT
,
[
4
])
data_3
=
helper
.
make_tensor_value_info
(
'3'
,
TensorProto
.
FLOAT
,
[
1
])
data_4
=
helper
.
make_tensor_value_info
(
'4'
,
TensorProto
.
FLOAT
,
[
2
,
3
,
1
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
,
4
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
,
"1"
,
"2"
,
"3"
,
"4"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
,
data_1
,
data_2
,
data_3
,
data_4
],
[
mean
])
@
onnx_test
def
mean_fp16_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
data_1
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
data_2
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT16
,
[
1
,
2
,
3
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
,
"1"
,
"2"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
,
data_1
,
data_2
],
[
mean
])
@
onnx_test
def
mean_invalid_broadcast_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
data_1
=
helper
.
make_tensor_value_info
(
'1'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
data_2
=
helper
.
make_tensor_value_info
(
'2'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
4
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
,
"1"
,
"2"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
,
data_1
,
data_2
],
[
mean
])
@
onnx_test
def
mean_single_input_test
():
data_0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
FLOAT
,
[
1
,
2
,
3
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
[
"0"
],
outputs
=
[
"mean"
])
return
([
node
],
[
data_0
],
[
mean
])
@
onnx_test
def
mean_test
():
data
=
[
helper
.
make_tensor_value_info
(
str
(
i
),
TensorProto
.
DOUBLE
,
[
2
,
2
,
2
])
for
i
in
range
(
10
)
]
data_names
=
[
str
(
i
)
for
i
in
range
(
10
)]
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
DOUBLE
,
[
2
,
2
,
2
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
data_names
,
outputs
=
[
"mean"
])
return
([
node
],
data
,
[
mean
])
@
onnx_test
def
min_test
():
a
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
3
])
...
...
@@ -2725,6 +2869,21 @@ def multinomial_test():
return
([
node
],
[
input
],
[
output
])
@
onnx_test
def
multinomial_generated_seed_test
():
sample_size
=
10
input
=
helper
.
make_tensor_value_info
(
"input"
,
TensorProto
.
FLOAT
,
[
1
,
10
])
output
=
helper
.
make_tensor_value_info
(
"output"
,
TensorProto
.
INT32
,
[
1
,
10
])
node
=
onnx
.
helper
.
make_node
(
'Multinomial'
,
inputs
=
[
'input'
],
sample_size
=
sample_size
,
outputs
=
[
'output'
])
return
([
node
],
[
input
],
[
output
])
@
onnx_test
def
multinomial_dtype_error_test
():
sample_size
=
10
...
...
@@ -3176,6 +3335,21 @@ def randomnormal_dtype_error_test():
return
([
node
],
[],
[
output
])
@
onnx_test
def
randomnormal_generated_seed_test
():
sample_size
=
10
input
=
helper
.
make_tensor_value_info
(
"input"
,
TensorProto
.
FLOAT
,
[
1
,
10
])
output
=
helper
.
make_tensor_value_info
(
"output"
,
TensorProto
.
INT32
,
[
1
,
10
])
node
=
onnx
.
helper
.
make_node
(
'RandomNormal'
,
inputs
=
[
'input'
],
sample_size
=
sample_size
,
outputs
=
[
'output'
])
return
([
node
],
[
input
],
[
output
])
@
onnx_test
def
randomnormal_shape_error_test
():
dtype
=
1
...
...
@@ -3266,6 +3440,21 @@ def randomuniform_dtype_error_test():
return
([
node
],
[],
[
output
])
@
onnx_test
def
randomuniform_generated_seed_test
():
sample_size
=
10
input
=
helper
.
make_tensor_value_info
(
"input"
,
TensorProto
.
FLOAT
,
[
1
,
10
])
output
=
helper
.
make_tensor_value_info
(
"output"
,
TensorProto
.
INT32
,
[
1
,
10
])
node
=
onnx
.
helper
.
make_node
(
'RandomUniform'
,
inputs
=
[
'input'
],
sample_size
=
sample_size
,
outputs
=
[
'output'
])
return
([
node
],
[
input
],
[
output
])
@
onnx_test
def
randomuniform_shape_error_test
():
dtype
=
1
...
...
@@ -4290,6 +4479,44 @@ def softmax_nonstd_input_test():
return
([
node0
,
node1
],
[
x
],
[
y
])
@
onnx_test
def
softsign_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
5
])
node
=
onnx
.
helper
.
make_node
(
'Softsign'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
def
softplus_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
5
])
node
=
onnx
.
helper
.
make_node
(
'Softplus'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
softsign_nd_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'Softsign'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
def
softplus_nd_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT16
,
[
3
,
4
,
5
])
node
=
onnx
.
helper
.
make_node
(
'Softplus'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
def
split_minus_axis_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
10
,
15
])
...
...
test/onnx/gen_onnx.pyc
View file @
3df20646
No preview for this file type
test/onnx/greaterorequal_test.onnx
0 → 100644
View file @
3df20646
greaterorequal_test:g
x1
x2y"GreaterOrEqualgreaterorequal_testZ
x1
Z
x2
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_default_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_default_test:i
xy"HardSigmoidhardsigmoid_default_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_double_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_double_test:
4
xy"HardSigmoid*
alpha>*
beta333?hardsigmoid_double_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_half_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_half_test:f
xy"HardSigmoidhardsigmoid_half_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardsigmoid_verify_test.onnx
0 → 100644
View file @
3df20646
hardsigmoid_verify_test:X
xy"HardSigmoidhardsigmoid_verify_testZ
x
b
y
B
\ No newline at end of file
test/onnx/hardswish_test.onnx
0 → 100644
View file @
3df20646
hardswish_test:M
xy" HardSwishhardswish_testZ
x
b
y
B
\ No newline at end of file
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment