Commit 3df20646 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

manual merge

parents 1005a693 d0543c96
...@@ -5,28 +5,31 @@ ...@@ -5,28 +5,31 @@
namespace migraphx { namespace migraphx {
template <class T, T v> template <class T, T V>
struct integral_constant struct integral_constant
{ {
static constexpr T value = v; static constexpr T value = V;
using value_type = T; using value_type = T;
using type = integral_constant; using type = integral_constant;
constexpr operator value_type() const noexcept { return value; } constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; } constexpr value_type operator()() const noexcept { return value; }
static constexpr type to() { return {}; }
}; };
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \ #define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \
template <class T, T v, class U, U w> \ template <class T, T V, class U, U w> \
constexpr inline integral_constant<decltype(v op w), (v op w)> operator op( \ constexpr inline integral_constant<decltype(V op w), (V op w)> operator op( \
integral_constant<T, v>, integral_constant<U, w>) noexcept \ integral_constant<T, V>, integral_constant<U, w>) noexcept \
{ \ { \
return {}; \ return {}; \
} }
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \ #define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \
template <class T, T v> \ template <class T, T V> \
constexpr inline integral_constant<decltype(op v), (op v)> operator op( \ constexpr inline integral_constant<decltype(op V), (op V)> operator op( \
integral_constant<T, v>) noexcept \ integral_constant<T, V>) noexcept \
{ \ { \
return {}; \ return {}; \
} }
...@@ -64,8 +67,8 @@ using false_type = bool_constant<false>; ...@@ -64,8 +67,8 @@ using false_type = bool_constant<false>;
template <index_int N> template <index_int N>
using index_constant = integral_constant<index_int, N>; using index_constant = integral_constant<index_int, N>;
template <auto v> template <auto V>
static constexpr auto _c = integral_constant<decltype(v), v>{}; static constexpr auto _c = integral_constant<decltype(V), V>{}; // NOLINT
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP #endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_MATH_HPP
#define MIGRAPHX_GUARD_KERNELS_MATH_HPP
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <hip/hip_fp16.h>
#include <hip/math_functions.h>
namespace migraphx {
namespace math {
constexpr float as_float(migraphx::half x) { return x; }
template <class T>
constexpr T as_float(T x)
{
return x;
}
} // namespace math
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH(name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) MIGRAPHX_RETURNS(fname(xs...))
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_VEC(name) \
template <class... Ts, MIGRAPHX_REQUIRES(is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) \
{ \
return vec_transform(xs...)([](auto... ys) { return name(ys...); }); \
}
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_FOR(type, name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(type x, Ts... xs)->type \
{ \
return fname(x, xs...); \
}
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_HALF(name, fname) \
template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(migraphx::half x, Ts... xs) \
MIGRAPHX_RETURNS(fname(math::as_float(x), math::as_float(xs)...))
MIGRAPHX_DEVICE_MATH(abs, ::abs)
MIGRAPHX_DEVICE_MATH(acos, ::acos)
MIGRAPHX_DEVICE_MATH(acosh, ::acosh)
MIGRAPHX_DEVICE_MATH(asin, ::asin)
MIGRAPHX_DEVICE_MATH(asinh, ::asinh)
MIGRAPHX_DEVICE_MATH(atan, ::atan)
MIGRAPHX_DEVICE_MATH(atanh, ::atanh)
MIGRAPHX_DEVICE_MATH(ceil, ::ceil)
MIGRAPHX_DEVICE_MATH(cos, ::cos)
MIGRAPHX_DEVICE_MATH(cosh, ::cosh)
MIGRAPHX_DEVICE_MATH(erf, ::erf)
MIGRAPHX_DEVICE_MATH(exp, ::exp)
MIGRAPHX_DEVICE_MATH(floor, ::floor)
MIGRAPHX_DEVICE_MATH(log, ::log)
MIGRAPHX_DEVICE_MATH(pow, ::pow)
MIGRAPHX_DEVICE_MATH(round, ::round)
MIGRAPHX_DEVICE_MATH(rsqrt, ::rsqrt)
MIGRAPHX_DEVICE_MATH(sin, ::sin)
MIGRAPHX_DEVICE_MATH(sinh, ::sinh)
MIGRAPHX_DEVICE_MATH(sqrt, ::sqrt)
MIGRAPHX_DEVICE_MATH(tan, ::tan)
MIGRAPHX_DEVICE_MATH(tanh, ::tanh)
// Float overloads
MIGRAPHX_DEVICE_MATH_FOR(float, acos, ::acosf)
MIGRAPHX_DEVICE_MATH_FOR(float, acosh, ::acoshf)
MIGRAPHX_DEVICE_MATH_FOR(float, asin, ::asinf)
MIGRAPHX_DEVICE_MATH_FOR(float, asinh, ::asinhf)
MIGRAPHX_DEVICE_MATH_FOR(float, atan, ::atanf)
MIGRAPHX_DEVICE_MATH_FOR(float, atanh, ::atanhf)
MIGRAPHX_DEVICE_MATH_FOR(float, cos, ::cosf)
MIGRAPHX_DEVICE_MATH_FOR(float, cosh, ::coshf)
MIGRAPHX_DEVICE_MATH_FOR(float, rsqrt, ::rsqrtf)
MIGRAPHX_DEVICE_MATH_FOR(float, sin, ::sinf)
MIGRAPHX_DEVICE_MATH_FOR(float, sinh, ::sinhf)
MIGRAPHX_DEVICE_MATH_FOR(float, tan, ::tanf)
MIGRAPHX_DEVICE_MATH_FOR(float, tanh, ::tanhf)
// Builtin half functions
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, abs, ::__habs)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, exp, ::hexp)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, log, ::hlog)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, rsqrt, ::hrsqrt)
MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, sqrt, ::hsqrt)
// Use float to compute half overload
MIGRAPHX_DEVICE_MATH_HALF(acos, ::acos)
MIGRAPHX_DEVICE_MATH_HALF(acosh, ::acosh)
MIGRAPHX_DEVICE_MATH_HALF(asin, ::asin)
MIGRAPHX_DEVICE_MATH_HALF(asinh, ::asinh)
MIGRAPHX_DEVICE_MATH_HALF(atan, ::atan)
MIGRAPHX_DEVICE_MATH_HALF(atanh, ::atanh)
MIGRAPHX_DEVICE_MATH_HALF(ceil, ::ceil)
MIGRAPHX_DEVICE_MATH_HALF(cos, ::cos)
MIGRAPHX_DEVICE_MATH_HALF(cosh, ::cosh)
MIGRAPHX_DEVICE_MATH_HALF(erf, ::erf)
MIGRAPHX_DEVICE_MATH_HALF(floor, ::floor)
MIGRAPHX_DEVICE_MATH_HALF(pow, ::pow)
MIGRAPHX_DEVICE_MATH_HALF(round, ::round)
MIGRAPHX_DEVICE_MATH_HALF(sin, ::sin)
MIGRAPHX_DEVICE_MATH_HALF(sinh, ::sinh)
MIGRAPHX_DEVICE_MATH_HALF(tan, ::tan)
MIGRAPHX_DEVICE_MATH_HALF(tanh, ::tanh)
template <class T, class U>
constexpr auto where(bool cond, const T& a, const U& b)
{
return cond ? a : b;
}
MIGRAPHX_DEVICE_MATH_VEC(abs)
MIGRAPHX_DEVICE_MATH_VEC(acos)
MIGRAPHX_DEVICE_MATH_VEC(acosh)
MIGRAPHX_DEVICE_MATH_VEC(asin)
MIGRAPHX_DEVICE_MATH_VEC(asinh)
MIGRAPHX_DEVICE_MATH_VEC(atan)
MIGRAPHX_DEVICE_MATH_VEC(atanh)
MIGRAPHX_DEVICE_MATH_VEC(ceil)
MIGRAPHX_DEVICE_MATH_VEC(cos)
MIGRAPHX_DEVICE_MATH_VEC(cosh)
MIGRAPHX_DEVICE_MATH_VEC(erf)
MIGRAPHX_DEVICE_MATH_VEC(exp)
MIGRAPHX_DEVICE_MATH_VEC(floor)
MIGRAPHX_DEVICE_MATH_VEC(log)
MIGRAPHX_DEVICE_MATH_VEC(pow)
MIGRAPHX_DEVICE_MATH_VEC(round)
MIGRAPHX_DEVICE_MATH_VEC(rsqrt)
MIGRAPHX_DEVICE_MATH_VEC(sin)
MIGRAPHX_DEVICE_MATH_VEC(sinh)
MIGRAPHX_DEVICE_MATH_VEC(sqrt)
MIGRAPHX_DEVICE_MATH_VEC(tan)
MIGRAPHX_DEVICE_MATH_VEC(tanh)
MIGRAPHX_DEVICE_MATH_VEC(where)
template <class T, class U>
constexpr auto max(const T& a, const U& b)
{
return where(a < b, b, a);
}
template <class T, class U>
constexpr auto min(const T& a, const U& b)
{
return where(a > b, b, a);
}
template <class T, class U>
constexpr auto convert(U v)
{
return vec_transform(v)([](auto x) -> T { return x; });
}
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_MATH_HPP
...@@ -3,19 +3,45 @@ ...@@ -3,19 +3,45 @@
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/math.hpp>
#include <migraphx/kernels/preload.hpp> #include <migraphx/kernels/preload.hpp>
#include <migraphx/kernels/vectorize.hpp> #include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/args.hpp> #include <migraphx/kernels/args.hpp>
namespace migraphx { namespace migraphx {
template <class T>
struct implicit_conversion_op
{
T x;
template <index_int N, class U>
constexpr operator vec<U, N>() const
{
static_assert(vec_size<T>() == N, "Vector mismatch size");
return __builtin_convertvector(x, vec<U, N>);
}
template <class U>
constexpr operator U() const
{
return x;
}
};
template <class T>
constexpr implicit_conversion_op<T> implicit_conversion(T x)
{
return {x};
}
template <class F, class T, class... Ts> template <class F, class T, class... Ts>
__device__ void pointwise_tensor(index idx, F f, T out, Ts... xs) __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
{ {
preload<typename T::type>(idx, xs...)([&](auto... ps) { preload<typename T::type>(idx, xs...)([&](auto... ps) {
idx.global_stride(out.get_shape().elements(), [&](auto i) { idx.global_stride(out.get_shape().elements(), [&](auto i) {
auto multi_idx = out.get_shape().multi(i); auto multi_idx = out.get_shape().multi(i);
out[multi_idx] = f(ps[multi_idx]...); out[multi_idx] = implicit_conversion(f(ps[multi_idx]...));
}); });
}); });
} }
...@@ -23,7 +49,7 @@ __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs) ...@@ -23,7 +49,7 @@ __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
template <class F, class... Ts> template <class F, class... Ts>
__device__ void pointwise(F f, Ts*... ps) __device__ void pointwise(F f, Ts*... ps)
{ {
auto t = transform_args(make_tensors(), rotate_last()); auto t = transform_args(make_tensors(), rotate_last(), auto_vectorize());
t(ps...)([&](auto... xs) { t(ps...)([&](auto... xs) {
auto idx = make_index(); auto idx = make_index();
pointwise_tensor(idx, f, xs...); pointwise_tensor(idx, f, xs...);
......
...@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss) ...@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss)
auto each = [&](auto x) { auto each = [&](auto x) {
constexpr auto s = decltype(x.get_shape()){}; constexpr auto s = decltype(x.get_shape()){};
constexpr auto size = _c<s.element_space()>; constexpr auto size = _c<s.element_space()>;
if constexpr(not s.broadcasted()) if constexpr(not s.broadcasted() or (s.elements() - size) < 64)
return f(x, offset, false_type{});
else if constexpr((s.elements() - size) < 64)
return f(x, offset, false_type{}); return f(x, offset, false_type{});
else else
{ {
...@@ -31,7 +29,7 @@ constexpr auto traverse_preload(Shapes... ss) ...@@ -31,7 +29,7 @@ constexpr auto traverse_preload(Shapes... ss)
} }
template <class T, class... Shapes> template <class T, class... Shapes>
constexpr index_int compute_preload_size(Shapes...) constexpr index_int compute_preload_size_c(Shapes...)
{ {
index_int size = 0; index_int size = 0;
traverse_preload<T>(Shapes{}...)( traverse_preload<T>(Shapes{}...)(
...@@ -39,6 +37,12 @@ constexpr index_int compute_preload_size(Shapes...) ...@@ -39,6 +37,12 @@ constexpr index_int compute_preload_size(Shapes...)
return size; return size;
} }
template <class T, class... Shapes>
constexpr auto compute_preload_size(Shapes...)
{
return _c<compute_preload_size_c<T>(Shapes{}...)>;
}
template <class F, class T, class... Ts> template <class F, class T, class... Ts>
__device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs) __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
{ {
...@@ -50,11 +54,21 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs) ...@@ -50,11 +54,21 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
[&](auto x, auto offset, auto copy) { [&](auto x, auto offset, auto copy) {
if constexpr(copy) if constexpr(copy)
{ {
auto v = vectorize(x); if constexpr(decltype(tensor_vec_size(x)){} == 0)
auto b = as_vec(tensor_vec_size(v), buffer + offset); {
idx.local_stride(v.get_shape().element_space(), auto v = vectorize(x);
[&](auto i) { b[i] = v.data()[i]; }); auto b = as_vec(tensor_vec_size(v), buffer + offset);
return x.with(buffer + offset); idx.local_stride(v.get_shape().element_space(),
[&](auto i) { b[i] = v.data()[i]; });
return x.with(buffer + offset);
}
else
{
auto b = as_vec(tensor_vec_size(x), buffer + offset);
idx.local_stride(x.get_shape().element_space(),
[&](auto i) { b[i] = x.data()[i]; });
return x.with(b);
}
} }
else else
{ {
...@@ -80,7 +94,7 @@ template <class T, class... Ts> ...@@ -80,7 +94,7 @@ template <class T, class... Ts>
__device__ auto preload(index idx, Ts... xs) __device__ auto preload(index idx, Ts... xs)
{ {
using type = typename remove_vec<T>::type; using type = typename remove_vec<T>::type;
constexpr auto size = compute_preload_size<type>(xs.get_shape()...); constexpr auto size = decltype(compute_preload_size<type>(xs.get_shape()...)){};
const index_int max_size = 512 * sizeof(type); const index_int max_size = 512 * sizeof(type);
return [=](auto f) { return [=](auto f) {
if constexpr(size > 0 and size < max_size) if constexpr(size > 0 and size < max_size)
......
#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP #ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP #define MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/algorithm.hpp> #include <migraphx/kernels/algorithm.hpp>
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp> #include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp> #include <migraphx/kernels/basic_ops.hpp>
#include <args.hpp> #include <migraphx/kernels/array.hpp>
namespace migraphx { namespace migraphx {
...@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data, ...@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
return op.final(output_val, count); return op.final(output_val, count);
} }
template <class T, class U, class V, class W> template <class T1, class T2, class T3, class T4>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t) struct roalign_settings
{ {
const float roi_offset = ROIS_OFFSET; T1 roi_offset{};
const bool is_avg_pooling = IS_AVG_POOLING; T2 is_avg_pooling{};
const int64_t sampling_ratio = SAMPLING_RATIO; T3 sampling_ratio{};
const float spatial_scale = SPATIAL_SCALE; T4 spatial_scale{};
};
template <class... Ts>
constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
{
return {xs...};
}
template <class T, class U, class V, class W, class Settings>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t, Settings s)
{
auto index = make_index(); auto index = make_index();
const auto* x = x_t.data(); const auto* x = x_t.data();
const auto* rois = rois_t.data(); const auto* rois = rois_t.data();
...@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const auto* offset_rois = rois + (n * roi_column_num); const auto* offset_rois = rois + (n * roi_column_num);
const int batch_ind = ind[n]; const int batch_ind = ind[n];
array<float, 2> roi_starts = {offset_rois[1] * spatial_scale, array<float, 2> roi_starts = {offset_rois[1] * s.spatial_scale,
offset_rois[0] * spatial_scale}; offset_rois[0] * s.spatial_scale};
array<float, 2> roi_ends = {offset_rois[3] * spatial_scale, offset_rois[2] * spatial_scale}; array<float, 2> roi_ends = {offset_rois[3] * s.spatial_scale,
offset_rois[2] * s.spatial_scale};
array<float, 2> roi_size{}; array<float, 2> roi_size{};
array<float, 2> bin_size{}; array<float, 2> bin_size{};
...@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] = bin_grid_size[ii] =
(sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]); (s.sampling_ratio > 0) ? s.sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]);
} }
const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]);
if constexpr(is_avg_pooling) if constexpr(s.is_avg_pooling)
{ {
out_ptr[i] = calc_pooling(offset_x, out_ptr[i] = calc_pooling(offset_x,
roi_starts, roi_starts,
...@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
roi_offset, s.roi_offset,
avg_pool{}); avg_pool{});
} }
else else
...@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
roi_offset, s.roi_offset,
max_pool{}); max_pool{});
} }
} }
......
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP #ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP #define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <migraphx/kernels/types.hpp> #include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp> #include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/functional.hpp>
namespace migraphx { namespace migraphx {
...@@ -13,7 +14,7 @@ constexpr auto vec_size(vec<T, N>) ...@@ -13,7 +14,7 @@ constexpr auto vec_size(vec<T, N>)
} }
template <class T> template <class T>
constexpr auto vec_size(T, ...) constexpr auto vec_size(T, ...) // NOLINT
{ {
return index_constant<0>{}; return index_constant<0>{};
} }
...@@ -24,6 +25,38 @@ constexpr auto vec_size() ...@@ -24,6 +25,38 @@ constexpr auto vec_size()
return decltype(vec_size(T{})){}; return decltype(vec_size(T{})){};
} }
template <class... Ts>
constexpr auto is_any_vec()
{
if constexpr(sizeof...(Ts) == 0)
return false_type{};
else
return bool_constant<((vec_size<Ts>() + ...) > 0)>{};
}
template <class T, class I>
constexpr auto vec_at(T x, I i)
{
if constexpr(vec_size<T>() == 0)
return x;
else
{
MIGRAPHX_ASSERT(i < vec_size<T>());
return x[i];
}
}
template <class... Ts>
constexpr auto common_vec_size()
{
return fold([](auto x, auto y) {
if constexpr(x > y)
return x;
else
return y;
})(vec_size<Ts>()...);
}
template <index_int N, class T> template <index_int N, class T>
__device__ __host__ auto as_vec(T* x) __device__ __host__ auto as_vec(T* x)
{ {
...@@ -33,5 +66,25 @@ __device__ __host__ auto as_vec(T* x) ...@@ -33,5 +66,25 @@ __device__ __host__ auto as_vec(T* x)
return reinterpret_cast<vec<T, N>*>(x); return reinterpret_cast<vec<T, N>*>(x);
} }
template <class... Ts>
constexpr auto vec_transform(Ts... xs)
{
return [=](auto f) {
if constexpr(is_any_vec<Ts...>())
{
using type = decltype(f(vec_at(xs, 0)...));
constexpr auto size = common_vec_size<Ts...>();
vec<type, size> result = {0};
for(int i = 0; i < size; i++)
result[i] = f(vec_at(xs, i)...);
return result;
}
else
{
return f(xs...);
}
};
}
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP #endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
...@@ -7,40 +7,70 @@ ...@@ -7,40 +7,70 @@
namespace migraphx { namespace migraphx {
template <class T> template <class T>
constexpr auto tensor_vec_size(T) constexpr auto tensor_vec_size()
{ {
return vec_size<typename T::type>(); return vec_size<typename T::type>();
} }
template <index_int N, class Shape> template <class T>
constexpr auto as_vec_shape(Shape s) constexpr auto tensor_vec_size(T)
{ {
auto lens = transform(s.lens, s.strides, [](auto len, auto stride) { return tensor_vec_size<T>();
if(stride == 1) }
return len / N;
else template <index_int N, class Shape, class Axis>
return len; constexpr auto shape_step(Shape s, Axis)
}); {
auto strides = transform(s.strides, [](auto stride) { static_assert(N > 0, "Vector size must be non-zero");
if(stride == 1) return sequence(s.lens.size(), [&](auto... is) {
return stride; auto lens = transform(s.lens, index_ints<is...>{}, [&](auto i, auto j) {
return stride / N; constexpr auto axis = Axis::to();
MIGRAPHX_ASSERT(i != 0);
MIGRAPHX_ASSERT(j != axis or i % N == 0);
if(j == axis)
return i / N;
else
return i;
});
auto strides = transform(s.strides, index_ints<is...>{}, [&](auto i, auto j) {
constexpr auto axis = Axis::to();
// If stride of the axis is zero then we dont need to adjust the other strides
if(Shape{}.strides[axis] == 0)
return i;
MIGRAPHX_ASSERT(j == axis or i % N == 0);
if(j == axis)
return i;
else
return i / N;
});
MIGRAPHX_ASSERT(make_shape(lens, strides).elements() * N == s.elements());
MIGRAPHX_ASSERT(strides[Axis{}] == 0 or
make_shape(lens, strides).element_space() * N == s.element_space());
return make_shape(lens, strides);
}); });
MIGRAPHX_ASSERT(make_shape(lens, strides).element_space() * N == s.element_space());
return make_shape(lens, strides);
} }
template <index_int N, class T> // Bools can not be used as a vector type so convert it to int8
__device__ __host__ auto as_vec(T x) template <class T>
__device__ __host__ T* remove_bool(T* x)
{
return x;
}
inline __device__ __host__ int8_t* remove_bool(bool* x) { return reinterpret_cast<int8_t*>(x); }
template <index_int N, class T, class Axis>
__device__ __host__ auto as_vec(T x, Axis axis)
{ {
if constexpr(N == 0) if constexpr(N == 0)
return x; return x;
else else
return make_tensor_view(as_vec<N>(x.data()), as_vec_shape<N>(x.get_shape())); return make_tensor_view(as_vec<N>(remove_bool(x.data())),
shape_step<N>(x.get_shape(), axis));
} }
template <index_int N, class T, class Axis> template <index_int N, class T, class Axis>
constexpr auto tensor_step(T x, Axis) constexpr auto tensor_step(T x, Axis axis)
{ {
if constexpr(N == 0) if constexpr(N == 0)
{ {
...@@ -49,17 +79,8 @@ constexpr auto tensor_step(T x, Axis) ...@@ -49,17 +79,8 @@ constexpr auto tensor_step(T x, Axis)
else else
{ {
constexpr auto s = decltype(x.get_shape()){}; constexpr auto s = decltype(x.get_shape()){};
MIGRAPHX_ASSERT(s.strides[Axis{}] == 0); MIGRAPHX_ASSERT(s.strides[axis] == 0);
return sequence(x.get_shape().lens.size(), [&](auto... is) { return make_tensor_view(x.data(), shape_step<N>(s, axis));
auto lens = transform(s.lens, index_ints<is...>{}, [&](auto i, auto j) {
constexpr auto axis = Axis{};
if(j == axis)
return i / N;
else
return i;
});
return make_tensor_view(x.data(), make_shape(lens, s.strides));
});
} }
} }
...@@ -69,42 +90,71 @@ __device__ __host__ auto as_vec(IntegralConstant ic, T&& x) ...@@ -69,42 +90,71 @@ __device__ __host__ auto as_vec(IntegralConstant ic, T&& x)
return as_vec<ic>(x); return as_vec<ic>(x);
} }
template <class... Shapes> template <class Shape>
constexpr index_int find_vector_axis(Shapes... ss) constexpr index_int find_vector_axis_c(Shape s)
{ {
// Find the fastest axis that is not broadcasted
index_int axis = 0; index_int axis = 0;
bool b = false; for(index_int i = 1; i < s.lens.size(); i++)
{
if(s.strides[i] == 0)
continue;
if(s.strides[axis] == 0 or
pack_compare(less{}, pack(s.strides[i], s.lens[i]), pack(s.strides[axis], s.lens[axis])))
axis = i;
}
return axis;
}
template <class... Shapes>
constexpr index_int find_vector_axis_c(Shapes... ss)
{
const bool all_broadcasted = (ss.broadcasted() and ...);
index_int axis = 0;
bool b = false;
by([&](auto s) { by([&](auto s) {
if(s.broadcasted() or b) if(b)
return; return;
auto it = find(s.strides.begin(), s.strides.end(), 1); // Skip broadcasted shapes if there are shapes not broadcasted
if(it == s.strides.end()) if(not all_broadcasted and s.broadcasted())
return; return;
axis = it - s.strides.begin(); axis = find_vector_axis_c(s);
b = true; if(s.strides[axis] == 1)
b = true;
})(ss...); })(ss...);
if(not b)
return -1;
return axis; return axis;
} }
template <class... Shapes>
constexpr auto find_vector_axis(Shapes...)
{
return _c<find_vector_axis_c(Shapes{}...)>;
}
template <index_int N, class Axis, class... Shapes> template <index_int N, class Axis, class... Shapes>
constexpr auto is_vectorizable(Axis axis, Shapes... ss) constexpr auto is_vectorizable_c(Axis axis, Shapes... ss)
{ {
return (((ss.lens[axis] % N) == 0 and (ss.strides[axis] == 1 or ss.strides[axis] == 0)) and return ((axis < ss.lens.size() and ss.lens[axis] % N == 0 and
// Only vectorize broadcasted types with stride 0, since this causes issues in the
// preloader
((not ss.broadcasted() and ss.strides[axis] == 1) or ss.strides[axis] == 0)) and
...); ...);
} }
template <index_int N, class... Shapes> template <index_int N, class Axis, class... Shapes>
constexpr bool is_vectorizable(Shapes... ss) constexpr auto is_vectorizable(Axis, Shapes...)
{ {
return (is_vectorizable<N>(ss, find_vector_axis(ss)) and ...); return _c<is_vectorizable_c<N>(Axis::to(), Shapes{}...)>;
} }
template <class P> template <class P>
constexpr auto find_vectorize_size(P pred) constexpr auto find_vectorize_size(P pred)
{ {
if constexpr(pred(_c<4>)) if constexpr(decltype(pred(_c<4>)){})
return _c<4>; return _c<4>;
else if constexpr(pred(_c<2>)) else if constexpr(decltype(pred(_c<2>)){})
return _c<2>; return _c<2>;
else else
return _c<0>; return _c<0>;
...@@ -113,11 +163,12 @@ constexpr auto find_vectorize_size(P pred) ...@@ -113,11 +163,12 @@ constexpr auto find_vectorize_size(P pred)
template <class T> template <class T>
__host__ __device__ auto vectorize(T x) __host__ __device__ auto vectorize(T x)
{ {
if constexpr(vec_size<T>() == 0) if constexpr(tensor_vec_size<T>() == 0)
{ {
constexpr auto axis = find_vector_axis(x.get_shape());
constexpr auto n = constexpr auto n =
find_vectorize_size([&](auto i) { return _c<is_vectorizable<i>(x.get_shape())>; }); find_vectorize_size([&](auto i) { return is_vectorizable<i>(axis, x.get_shape()); });
return as_vec<n>(x); return as_vec<n>(x, axis);
} }
else else
{ {
...@@ -125,34 +176,46 @@ __host__ __device__ auto vectorize(T x) ...@@ -125,34 +176,46 @@ __host__ __device__ auto vectorize(T x)
} }
} }
template <class F, class... Ts>
inline __device__ __host__ auto auto_vectorize_impl(F f, Ts... xs)
{
// TODO: Just check there a single axis of 1
constexpr bool packed_or_broadcasted =
((xs.get_shape().packed() or xs.get_shape().broadcasted()) and ...);
if constexpr(packed_or_broadcasted)
{
constexpr auto axis = decltype(find_vector_axis(xs.get_shape()...)){};
constexpr auto n = find_vectorize_size(
[&](auto i) { return is_vectorizable<i>(axis, xs.get_shape()...); });
by(
[&](auto x) {
constexpr auto s = decltype(x.get_shape()){};
if constexpr(axis < s.strides.size())
{
MIGRAPHX_ASSERT(s.strides[axis] == 0 or s.strides[axis] == 1);
MIGRAPHX_ASSERT(s.lens[axis] > 0);
MIGRAPHX_ASSERT(n == 0 or s.lens[axis] % n == 0);
if constexpr(s.strides[axis] == 0)
return tensor_step<n>(x, axis);
else
return as_vec<n>(x, axis);
}
else
{
return x;
}
},
f)(xs...);
}
else
{
f(xs...);
}
}
inline __device__ __host__ auto auto_vectorize() inline __device__ __host__ auto auto_vectorize()
{ {
return [](auto... xs) { return [](auto... xs) { return [=](auto f) { auto_vectorize_impl(f, xs...); }; };
return [=](auto f) {
// TODO: Just check there a single axis of 1
constexpr bool packed_or_broadcasted =
((xs.get_shape().packed() or xs.get_shape().broadcasted()) and ...);
if constexpr(packed_or_broadcasted)
{
constexpr auto axis = find_vector_axis(xs.get_shape()...);
constexpr auto n = find_vectorize_size(
[&](auto i) { return _c<is_vectorizable<i>(axis, xs.get_shape()...)>; });
by(
[&](auto x) {
constexpr auto s = x.get_shape();
if constexpr(s.strides[axis] == 0)
return tensor_step<n>(x, axis);
else
return as_vec<n>(x);
},
f)(xs...);
}
else
{
f(xs...);
}
};
};
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -60,6 +60,7 @@ struct miopen_apply ...@@ -60,6 +60,7 @@ struct miopen_apply
std::unordered_map<instruction_ref, std::string> prog_output_names{}; std::unordered_map<instruction_ref, std::string> prog_output_names{};
bool offload_copy = false; bool offload_copy = false;
bool int8_x4_format = true; bool int8_x4_format = true;
bool compute_fp32 = false;
context& get_context() const context& get_context() const
{ {
...@@ -103,6 +104,8 @@ struct miopen_apply ...@@ -103,6 +104,8 @@ struct miopen_apply
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38 #if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto& ctx = get_context(); auto& ctx = get_context();
if(ctx.get_stream().get_device_name() == "gfx908")
compute_fp32 = true;
rocblas_gemm_flags flag; rocblas_gemm_flags flag;
rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag); rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag);
int8_x4_format = (flag == rocblas_gemm_flags_pack_int8x4); int8_x4_format = (flag == rocblas_gemm_flags_pack_int8x4);
...@@ -337,7 +340,7 @@ struct miopen_apply ...@@ -337,7 +340,7 @@ struct miopen_apply
} }
} }
return mod->replace_instruction( return mod->replace_instruction(
ins, rocblas_gemm<Op>{Op{}, 1, 0, int8_x4_format}, refs); ins, rocblas_gemm<Op>{Op{}, 1, 0, int8_x4_format, compute_fp32}, refs);
}); });
} }
......
...@@ -101,4 +101,38 @@ TEST_CASE(after_param_broadcast) ...@@ -101,4 +101,38 @@ TEST_CASE(after_param_broadcast)
EXPECT(not m.get_output_shapes().back().broadcasted()); EXPECT(not m.get_output_shapes().back().broadcasted());
} }
TEST_CASE(two_transpose_gather)
{
migraphx::module m1;
{
auto data = m1.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
auto ind = m1.add_parameter("ind", {migraphx::shape::float_type, {2, 3}});
auto td = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), data);
auto sd = m1.add_instruction(migraphx::make_op("softmax", {{"axis", 2}}), td);
auto bd =
m1.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 3, 1, 2}}}), sd);
auto r = m1.add_instruction(migraphx::make_op("gather", {{"axis", 2}}), bd, ind);
m1.add_return({r});
}
run_pass(m1);
migraphx::module m2;
{
auto data = m2.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
auto ind = m2.add_parameter("ind", {migraphx::shape::float_type, {2, 3}});
auto td = m2.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), data);
auto ctd = m2.add_instruction(migraphx::make_op("contiguous"), td);
auto sd = m2.add_instruction(migraphx::make_op("softmax", {{"axis", 2}}), ctd);
auto bd =
m2.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 3, 1, 2}}}), sd);
auto cbd = m2.add_instruction(migraphx::make_op("contiguous"), bd);
auto r = m2.add_instruction(migraphx::make_op("gather", {{"axis", 2}}), cbd, ind);
m2.add_return({r});
}
EXPECT(m1 == m2);
}
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -73,6 +73,35 @@ TEST_CASE(double_add) ...@@ -73,6 +73,35 @@ TEST_CASE(double_add)
EXPECT(p1.sort() == p2.sort()); EXPECT(p1.sort() == p2.sort());
} }
TEST_CASE(double_add_without_return)
{
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
migraphx::program p1;
{
auto* mm = p1.get_main_module();
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", s);
auto z = mm->add_parameter("z", s);
auto add1 = mm->add_instruction(migraphx::make_op("add"), x, y);
mm->add_instruction(migraphx::make_op("add"), add1, z);
}
run_pass(p1);
migraphx::program p2;
{
auto* mm = p2.get_main_module();
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", s);
auto z = mm->add_parameter("z", s);
auto fadd =
add_pointwise(p2, "main:pointwise0", {x, y, z}, [=](auto* pm, const auto& inputs) {
auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);
return pm->add_instruction(migraphx::make_op("add"), add1, inputs[2]);
});
mm->add_instruction(migraphx::make_op("identity"), fadd);
}
EXPECT(p1.sort() == p2.sort());
}
TEST_CASE(used_twice_not_fused) TEST_CASE(used_twice_not_fused)
{ {
migraphx::shape s{migraphx::shape::float_type, {2, 3}}; migraphx::shape s{migraphx::shape::float_type, {2, 3}};
......
...@@ -1618,6 +1618,22 @@ def greater_bool_test(): ...@@ -1618,6 +1618,22 @@ def greater_bool_test():
return ([node1, node2], [x1, x2], [y]) return ([node1, node2], [x1, x2], [y])
@onnx_test
def greaterorequal_test():
x1 = helper.make_tensor_value_info('x1', TensorProto.FLOAT, [3])
x2 = helper.make_tensor_value_info('x2', TensorProto.FLOAT, [3])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3])
node = onnx.helper.make_node(
'GreaterOrEqual',
inputs=['x1', 'x2'],
outputs=['y'],
)
return ([node], [x1, x2], [y])
@onnx_test @onnx_test
def group_conv_test(): def group_conv_test():
x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 4, 16, 16]) x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 4, 16, 16])
...@@ -1634,6 +1650,60 @@ def group_conv_test(): ...@@ -1634,6 +1650,60 @@ def group_conv_test():
return ([node], [x, y], [z]) return ([node], [x, y], [z])
@onnx_test
def hardsigmoid_default_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 3, 4, 5])
node = onnx.helper.make_node('HardSigmoid', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def hardsigmoid_double_test():
x = helper.make_tensor_value_info('x', TensorProto.DOUBLE, [1, 3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.DOUBLE, [1, 3, 4, 5])
node = onnx.helper.make_node('HardSigmoid',
inputs=['x'],
outputs=['y'],
alpha=0.3,
beta=0.7)
return ([node], [x], [y])
@onnx_test
def hardsigmoid_half_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [1, 3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT16, [1, 3, 4, 5])
node = onnx.helper.make_node('HardSigmoid', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def hardsigmoid_verify_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 5])
node = onnx.helper.make_node('HardSigmoid', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def hardswish_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 5])
node = onnx.helper.make_node('HardSwish', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test @onnx_test
def if_else_test(): def if_else_test():
x = onnx.helper.make_tensor_value_info('x', onnx.TensorProto.FLOAT, [2, 3]) x = onnx.helper.make_tensor_value_info('x', onnx.TensorProto.FLOAT, [2, 3])
...@@ -2692,6 +2762,80 @@ def maxpool_same_upper_test(): ...@@ -2692,6 +2762,80 @@ def maxpool_same_upper_test():
return ([node], [x], [y]) return ([node], [x], [y])
@onnx_test
def mean_broadcast_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 3, 4])
data_1 = helper.make_tensor_value_info('1', TensorProto.FLOAT,
[1, 2, 3, 4])
data_2 = helper.make_tensor_value_info('2', TensorProto.FLOAT, [4])
data_3 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [1])
data_4 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2, 3, 1])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT,
[1, 2, 3, 4])
node = onnx.helper.make_node("Mean",
inputs=["0", "1", "2", "3", "4"],
outputs=["mean"])
return ([node], [data_0, data_1, data_2, data_3, data_4], [mean])
@onnx_test
def mean_fp16_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT16, [1, 2, 3])
data_1 = helper.make_tensor_value_info('1', TensorProto.FLOAT16, [1, 2, 3])
data_2 = helper.make_tensor_value_info('2', TensorProto.FLOAT16, [1, 2, 3])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT16,
[1, 2, 3])
node = onnx.helper.make_node("Mean",
inputs=["0", "1", "2"],
outputs=["mean"])
return ([node], [data_0, data_1, data_2], [mean])
@onnx_test
def mean_invalid_broadcast_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 2, 3])
data_1 = helper.make_tensor_value_info('1', TensorProto.FLOAT, [1, 2, 3])
data_2 = helper.make_tensor_value_info('2', TensorProto.FLOAT, [1, 2, 4])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT, [1, 2, 3])
node = onnx.helper.make_node("Mean",
inputs=["0", "1", "2"],
outputs=["mean"])
return ([node], [data_0, data_1, data_2], [mean])
@onnx_test
def mean_single_input_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 2, 3])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT, [1, 2, 3])
node = onnx.helper.make_node("Mean", inputs=["0"], outputs=["mean"])
return ([node], [data_0], [mean])
@onnx_test
def mean_test():
data = [
helper.make_tensor_value_info(str(i), TensorProto.DOUBLE, [2, 2, 2])
for i in range(10)
]
data_names = [str(i) for i in range(10)]
mean = helper.make_tensor_value_info('mean', TensorProto.DOUBLE, [2, 2, 2])
node = onnx.helper.make_node("Mean", inputs=data_names, outputs=["mean"])
return ([node], data, [mean])
@onnx_test @onnx_test
def min_test(): def min_test():
a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3]) a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])
...@@ -2725,6 +2869,21 @@ def multinomial_test(): ...@@ -2725,6 +2869,21 @@ def multinomial_test():
return ([node], [input], [output]) return ([node], [input], [output])
@onnx_test
def multinomial_generated_seed_test():
sample_size = 10
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10])
node = onnx.helper.make_node('Multinomial',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test @onnx_test
def multinomial_dtype_error_test(): def multinomial_dtype_error_test():
sample_size = 10 sample_size = 10
...@@ -3176,6 +3335,21 @@ def randomnormal_dtype_error_test(): ...@@ -3176,6 +3335,21 @@ def randomnormal_dtype_error_test():
return ([node], [], [output]) return ([node], [], [output])
@onnx_test
def randomnormal_generated_seed_test():
sample_size = 10
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10])
node = onnx.helper.make_node('RandomNormal',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test @onnx_test
def randomnormal_shape_error_test(): def randomnormal_shape_error_test():
dtype = 1 dtype = 1
...@@ -3266,6 +3440,21 @@ def randomuniform_dtype_error_test(): ...@@ -3266,6 +3440,21 @@ def randomuniform_dtype_error_test():
return ([node], [], [output]) return ([node], [], [output])
@onnx_test
def randomuniform_generated_seed_test():
sample_size = 10
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10])
node = onnx.helper.make_node('RandomUniform',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test @onnx_test
def randomuniform_shape_error_test(): def randomuniform_shape_error_test():
dtype = 1 dtype = 1
...@@ -4290,6 +4479,44 @@ def softmax_nonstd_input_test(): ...@@ -4290,6 +4479,44 @@ def softmax_nonstd_input_test():
return ([node0, node1], [x], [y]) return ([node0, node1], [x], [y])
@onnx_test
def softsign_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [5])
node = onnx.helper.make_node('Softsign', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
def softplus_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [5])
node = onnx.helper.make_node('Softplus', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def softsign_nd_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT16, [3, 4, 5])
node = onnx.helper.make_node('Softsign', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
def softplus_nd_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT16, [3, 4, 5])
node = onnx.helper.make_node('Softplus', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test @onnx_test
def split_minus_axis_test(): def split_minus_axis_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 15]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 15])
......
No preview for this file type
greaterorequal_test:g

x1
x2y"GreaterOrEqualgreaterorequal_testZ
x1

Z
x2

b
y

B
\ No newline at end of file
hardsigmoid_default_test:i

xy" HardSigmoidhardsigmoid_default_testZ
x




b
y




B
\ No newline at end of file
hardsigmoid_double_test:
4
xy" HardSigmoid*
alpha>*
beta333?hardsigmoid_double_testZ
x
 



b
y
 



B
\ No newline at end of file
hardsigmoid_half_test:f

xy" HardSigmoidhardsigmoid_half_testZ
x





b
y





B
\ No newline at end of file
hardsigmoid_verify_test:X

xy" HardSigmoidhardsigmoid_verify_testZ
x


b
y


B
\ No newline at end of file
hardswish_test:M

xy" HardSwishhardswish_testZ
x


b
y


B
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment