Commit 7f65a88e authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into mlir-c

parents 79bfe69f b20e3d4d
...@@ -180,6 +180,63 @@ void program::finalize() ...@@ -180,6 +180,63 @@ void program::finalize()
mm->finalize(this->impl->ctx); mm->finalize(this->impl->ctx);
} }
template <class T>
std::string classify(T x)
{
switch(std::fpclassify(x))
{
case FP_INFINITE: return "inf";
case FP_NAN: return "nan";
case FP_NORMAL: return "normal";
case FP_SUBNORMAL: return "subnormal";
case FP_ZERO: return "zero";
default: return "unknown";
}
}
std::unordered_set<std::string> classify_argument(const argument& a)
{
std::unordered_set<std::string> result;
a.visit(
[&](auto t) {
for(const auto& x : t)
result.insert(classify(x));
},
[&](const auto& xs) {
for(const auto& x : xs)
{
auto r = classify_argument(x);
result.insert(r.begin(), r.end());
}
});
return result;
}
void preview_argument(std::ostream& os, const argument& a)
{
a.visit(
[&](auto t) {
if(t.size() <= 10)
{
os << t;
}
else
{
os << to_string_range(t.begin(), t.begin() + 5);
os << ", ..., ";
os << to_string_range(t.end() - 5, t.end());
}
},
[&](const auto& xs) {
for(const auto& x : xs)
{
os << '{';
preview_argument(os, x);
os << '}';
}
});
}
template <class F> template <class F>
std::vector<argument> generic_eval(const module* mod, std::vector<argument> generic_eval(const module* mod,
context& ctx, context& ctx,
...@@ -313,7 +370,20 @@ std::vector<argument> program::eval(parameter_map params) const ...@@ -313,7 +370,20 @@ std::vector<argument> program::eval(parameter_map params) const
ins->name() != "load" and not result.empty()) ins->name() != "load" and not result.empty())
{ {
target tgt = make_target(this->impl->target_name); target tgt = make_target(this->impl->target_name);
std::cout << "Output: " << tgt.copy_from(result) << std::endl; auto buffer = tgt.copy_from(result);
if(trace_level == 2)
{
std::cout << "Output has "
<< to_string_range(classify_argument(buffer))
<< std::endl;
std::cout << "Output: ";
preview_argument(std::cout, buffer);
std::cout << std::endl;
}
else
{
std::cout << "Output: " << buffer << std::endl;
}
} }
return result; return result;
})); }));
......
...@@ -327,8 +327,12 @@ target_flags(HIP_COMPILER_FLAGS hip::device) ...@@ -327,8 +327,12 @@ target_flags(HIP_COMPILER_FLAGS hip::device)
# Remove cuda arch flags # Remove cuda arch flags
string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REGEX REPLACE --offload-arch=[a-z0-9:+-]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") string(REGEX REPLACE --offload-arch=[a-z0-9:+-]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REPLACE "$<LINK_LANGUAGE:CXX>" "1" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") # Skip library paths since hip will incorrectly treat it as a source file
string(REPLACE "SHELL:" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") string(APPEND HIP_COMPILER_FLAGS " ")
foreach(_unused RANGE 2)
string(REGEX REPLACE " /[^ ]+\\.(a|so) " " " HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
endforeach()
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}") message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
target_compile_definitions(migraphx_gpu PRIVATE target_compile_definitions(migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}" "-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}"
......
...@@ -9,7 +9,7 @@ namespace gpu { ...@@ -9,7 +9,7 @@ namespace gpu {
shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2).standard(); check_shapes{inputs, *this}.has(2);
return op.normalize_compute_shape({inputs.at(0)}); return op.normalize_compute_shape({inputs.at(0)});
} }
......
...@@ -9,7 +9,7 @@ namespace gpu { ...@@ -9,7 +9,7 @@ namespace gpu {
shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2).standard(); check_shapes{inputs, *this}.has(2);
return op.normalize_compute_shape({inputs.at(0)}); return op.normalize_compute_shape({inputs.at(0)});
} }
......
...@@ -114,6 +114,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option ...@@ -114,6 +114,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global); options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local); options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
options.params += " " + join_strings(compiler_warnings(), " "); options.params += " " + join_strings(compiler_warnings(), " ");
options.params += " -ftemplate-backtrace-limit=0";
options.params += " -Werror"; options.params += " -Werror";
auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name()); auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name());
if(cos.size() != 1) if(cos.size() != 1)
......
...@@ -63,8 +63,16 @@ operation compile_pointwise(context& ctx, const std::vector<shape>& inputs, modu ...@@ -63,8 +63,16 @@ operation compile_pointwise(context& ctx, const std::vector<shape>& inputs, modu
run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}}); run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}});
cpp_generator g; cpp_generator g;
g.fmap([](const std::string& fname) { return "migraphx::" + fname; }); g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
auto name = g.create_function(g.generate_module(m).set_attributes({"__device__"})); g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
return compile_pointwise((ctx), inputs, "&" + name, g.str()); g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
g.add_point_op("sign", "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
g.add_point_op("less", "migraphx::abs(${0} < ${1})");
g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
g.add_point_op("not", "migraphx::abs(not ${0})");
auto name =
g.create_function(g.generate_module(m).set_attributes({"__device__"}).set_generic_types(m));
return compile_pointwise((ctx), inputs, "MIGRAPHX_LIFT(" + name + ")", g.str());
} }
} // namespace gpu } // namespace gpu
......
...@@ -62,6 +62,8 @@ struct fusion ...@@ -62,6 +62,8 @@ struct fusion
keep_alive(std::move(t)); keep_alive(std::move(t));
} }
bool empty() const { return fp == nullptr; }
op_t operator[](std::size_t i) const op_t operator[](std::size_t i) const
{ {
assert(fp); assert(fp);
...@@ -125,12 +127,11 @@ struct fusion ...@@ -125,12 +127,11 @@ struct fusion
return shape{shape::int8_type, {ws_size}}; return shape{shape::int8_type, {ws_size}};
} }
void compile(context& ctx) bool compile(context& ctx)
{ {
assert(fp); assert(fp);
auto status = miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get()); return miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get()) ==
if(status != miopenStatusSuccess) miopenStatusSuccess;
MIGRAPHX_THROW("Compiling fusion plan failed");
} }
argument execute(context& ctx, argument execute(context& ctx,
...@@ -561,6 +562,117 @@ struct find_mul_add_relu ...@@ -561,6 +562,117 @@ struct find_mul_add_relu
} }
}; };
struct miopen_fusion
{
struct fuse_op_data
{
operation op;
float alpha = 1;
float beta = 0;
};
struct fuse_op : fuse_op_data, reflect_equality<fuse_op>, reflect_stream<fuse_op>
{
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"), f(self.alpha, "alpha"), f(self.beta, "beta"));
}
};
std::vector<fuse_op> ops = {};
fusion f = {};
std::function<void(context&, const fusion&, const std::vector<argument>&)> execute;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.ops, "ops"));
}
value compile(context& ctx, const shape&, std::vector<shape> inputs)
{
// Compensate for allocation
inputs.pop_back();
std::size_t i = 0;
f = fusion(inputs[i]);
i++;
std::vector<std::function<void(const fused_operator_args&, const std::vector<argument>&)>>
invokers;
for(auto&& fop : ops)
{
if(i > inputs.size())
{
f = {};
return {};
}
if(fop.op.name() == "convolution")
{
auto* mop = f.create_conv(any_cast<op::convolution>(fop.op), inputs[i]);
invokers.push_back(
[=](const fused_operator_args& fargs, const std::vector<argument>& args) {
miopenSetOpArgsConvForward(
fargs.get(), mop, &fop.alpha, &fop.beta, args[i].implicit());
});
i++;
}
else if(fop.op.name() == "add")
{
auto* mop = f.create_bias(inputs[i]);
invokers.push_back(
[=](const fused_operator_args& fargs, const std::vector<argument>& args) {
miopenSetOpArgsBiasForward(
fargs.get(), mop, &fop.alpha, &fop.beta, args[i].implicit());
});
i++;
}
else if(fop.op.name() == "relu")
{
auto* mop = f.create_relu();
invokers.push_back([=](const fused_operator_args& fargs,
const std::vector<argument>&) {
miopenSetOpArgsActivForward(fargs.get(), mop, &fop.alpha, &fop.beta, 0, 0, 0);
});
}
else
{
f = {};
return {};
}
}
if(not f.compile(ctx))
{
f = {};
return {};
}
execute = [invokers](context& c, const fusion& ff, const std::vector<argument>& args) {
auto fargs = make_fused_args();
for(auto&& invoker : invokers)
invoker(fargs, args);
ff.execute(c, fargs, args.front(), args.back());
};
return {{"workspace", f.get_workspace(ctx).bytes()}};
}
void finalize(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
{
if(not f.empty())
return;
auto v = compile(ctx, output_shape, inputs);
if(not v.is_object())
MIGRAPHX_THROW("Failed to compile fusion plan");
}
std::string name() const { return "gpu::miopen_fusion"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
if(ops.empty())
return {};
// TODO: Check number of arguments
return ops.front().op.compute_shape({inputs[0], inputs[1]});
}
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
execute(ctx, f, args);
return args.back();
}
};
struct miopen_conv_bias struct miopen_conv_bias
{ {
op::convolution op; op::convolution op;
...@@ -596,7 +708,8 @@ struct miopen_conv_bias ...@@ -596,7 +708,8 @@ struct miopen_conv_bias
f = fusion(inputs[0]); f = fusion(inputs[0]);
conv = f.create_conv(op, inputs[1]); conv = f.create_conv(op, inputs[1]);
bias = f.create_bias(inputs[3]); bias = f.create_bias(inputs[3]);
f.compile(ctx); if(not f.compile(ctx))
MIGRAPHX_THROW("Failed to compile fusion plan");
} }
shape get_workspace(context& ctx) { return f.get_workspace(ctx); } shape get_workspace(context& ctx) { return f.get_workspace(ctx); }
...@@ -683,6 +796,25 @@ void apply_conv_bias(context& ctx, module& p, match::matcher_result r) ...@@ -683,6 +796,25 @@ void apply_conv_bias(context& ctx, module& p, match::matcher_result r)
p.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins); p.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins);
} }
inline auto precompile_name(std::string s) // NOLINT
{
return match::make_basic_pred_matcher([=](instruction_ref ins) {
if(ins->name() != "gpu::precompile_op")
return false;
auto op = from_value<operation>(ins->get_operator().to_value().at("op"));
return (op.name() == s);
});
}
template <class... Ms>
auto conv_bias_pointwise(Ms... ms)
{
return precompile_name("pointwise")(
match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"),
fusable_conv(match::used_once()).bind("conv")),
ms...);
}
struct find_conv_bias struct find_conv_bias
{ {
context* ctx = nullptr; context* ctx = nullptr;
...@@ -709,6 +841,46 @@ struct find_conv_bias_relu ...@@ -709,6 +841,46 @@ struct find_conv_bias_relu
} }
}; };
struct find_conv_pointwise
{
context* ctx = nullptr;
auto matcher() const
{
return precompile_name("pointwise")(
match::nargs(3),
match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"),
fusable_conv(match::used_once()).bind("conv")));
}
void apply(module& m, match::matcher_result r) const
{
auto conv_ins = r.instructions["conv"];
auto bias_ins = r.instructions["bias"];
auto ins = r.result;
auto input_ins = conv_ins->inputs().at(0);
auto weights_ins = conv_ins->inputs().at(1);
auto conv_op = any_cast<miopen_convolution>(conv_ins->get_operator()).op;
auto alloc_ins = ins->inputs().back();
module_ref pm = ins->module_inputs().front();
miopen_fusion op{};
op.ops.push_back({{conv_op}});
for(auto&& i : *pm)
{
if(i.name()[0] == '@')
continue;
auto inputs = to_shapes(i.inputs());
op.ops.push_back({{i.get_operator()}});
}
std::vector<instruction_ref> inputs = {input_ins, weights_ins, bias_ins, alloc_ins};
auto v = op.compile(*ctx, ins->get_shape(), to_shapes(inputs));
if(not v.is_object())
return;
m.replace_instruction(ins, op, inputs);
}
};
struct find_gemm_add struct find_gemm_add
{ {
auto matcher() const auto matcher() const
...@@ -778,6 +950,7 @@ void fuse_ops::apply(module& p) const ...@@ -778,6 +950,7 @@ void fuse_ops::apply(module& p) const
match::find_matches(p, find_triadd{}); match::find_matches(p, find_triadd{});
match::find_matches(p, match::find_matches(p,
find_layernorm{}, find_layernorm{},
find_conv_pointwise{ctx},
find_conv_bias_relu{ctx}, find_conv_bias_relu{ctx},
find_conv_bias{ctx}, find_conv_bias{ctx},
find_add_gelu{}, find_add_gelu{},
......
...@@ -76,8 +76,9 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a ...@@ -76,8 +76,9 @@ void arg_op(Op op, hipStream_t stream, const argument& result, const argument& a
size_t batch_item_num = batch_lens[axis]; size_t batch_item_num = batch_lens[axis];
batch_lens[axis] = 1; batch_lens[axis] = 1;
migraphx::shape batch_shape{arg_shape.type(), batch_lens}; migraphx::shape batch_shape{arg_shape.type(), batch_lens};
migraphx::shape std_arg_shape{arg_shape.type(), arg_shape.lens()};
hip_visit_all(arg, arg_shape, batch_shape)([&](auto input, auto arg_s, auto batch_s) { hip_visit_all(arg, std_arg_shape, batch_shape)([&](auto input, auto arg_s, auto batch_s) {
auto* output = device_cast(result.get<int64_t>().data()); auto* output = device_cast(result.get<int64_t>().data());
using type = device_type<std::remove_cv_t<typename decltype(input)::value_type>>; using type = device_type<std::remove_cv_t<typename decltype(input)::value_type>>;
// use one block for items in one batch. // use one block for items in one batch.
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
namespace migraphx { namespace migraphx {
#define MIGRAPHX_STRINGIZE_1(...) #__VA_ARGS__
#define MIGRAPHX_STRINGIZE(...) MIGRAPHX_STRINGIZE_1(__VA_ARGS__)
// Workaround hip's broken abort on device code // Workaround hip's broken abort on device code
#ifdef __HIP_DEVICE_COMPILE__ #ifdef __HIP_DEVICE_COMPILE__
// NOLINTNEXTLINE // NOLINTNEXTLINE
...@@ -14,19 +17,67 @@ namespace migraphx { ...@@ -14,19 +17,67 @@ namespace migraphx {
#define MIGRAPHX_HIP_NORETURN [[noreturn]] #define MIGRAPHX_HIP_NORETURN [[noreturn]]
#endif #endif
namespace debug {
struct swallow
{
template <class... Ts>
constexpr swallow(Ts&&...)
{
}
};
template <size_t N>
struct print_buffer
{
char buffer[N + 1] = {0};
char* pos = buffer;
constexpr void append(char c)
{
if(c == 0)
return;
if(pos < buffer + N)
{
*pos = c;
pos++;
}
}
template <size_t M>
constexpr void append(const char (&array)[M])
{
for(int i = 0; i < M; i++)
append(array[i]);
}
};
template <class... Ts>
__host__ __device__ void print(const Ts&... xs)
{
const auto size = (sizeof(xs) + ...);
print_buffer<size> buffer;
swallow{(buffer.append(xs), 0)...};
printf("%s", buffer.buffer);
}
} // namespace debug
// noreturn cannot be used on this function because abort in hip is broken // noreturn cannot be used on this function because abort in hip is broken
template <class T1, class T2, class T3, class T4>
MIGRAPHX_HIP_NORETURN inline __host__ __device__ void MIGRAPHX_HIP_NORETURN inline __host__ __device__ void
assert_fail(const char* assertion, const char* file, unsigned int line, const char* function) assert_fail(const T1& assertion, const T2& file, const T3& line, const T4& function)
{ {
printf("%s:%u: %s: assertion '%s' failed.\n", file, line, function, assertion); // printf is broken on hip with more than one argument, so use a simple print functions instead
debug::print(file, ":", line, ": ", function, ": assertion '", assertion, "' failed.\n");
// printf("%s:%s: %s: assertion '%s' failed.\n", file, line, function, assertion);
abort(); abort();
} }
#ifdef MIGRAPHX_DEBUG #ifdef MIGRAPHX_DEBUG
#define MIGRAPHX_ASSERT(cond) \ #define MIGRAPHX_ASSERT(cond) \
((cond) ? void(0) : [](auto... xs) { \ ((cond) ? void(0) : [](auto&&... private_migraphx_xs) { \
assert_fail(xs...); \ assert_fail(private_migraphx_xs...); \
}(#cond, __FILE__, __LINE__, __PRETTY_FUNCTION__)) }(#cond, __FILE__, MIGRAPHX_STRINGIZE(__LINE__), __PRETTY_FUNCTION__))
#else #else
#define MIGRAPHX_ASSERT(cond) #define MIGRAPHX_ASSERT(cond)
#endif #endif
......
...@@ -137,12 +137,48 @@ constexpr void each_args(F) ...@@ -137,12 +137,48 @@ constexpr void each_args(F)
{ {
} }
template <class F, class T>
constexpr auto fold_impl(F&&, T&& x)
{
return static_cast<T&&>(x);
}
template <class F, class T, class U, class... Ts>
constexpr auto fold_impl(F&& f, T&& x, U&& y, Ts&&... xs)
{
return fold_impl(f, f(static_cast<T&&>(x), static_cast<U&&>(y)), static_cast<Ts&&>(xs)...);
}
template <class F>
constexpr auto fold(F f)
{
return [=](auto&&... xs) { return fold_impl(f, static_cast<decltype(xs)&&>(xs)...); };
}
template <class... Ts> template <class... Ts>
auto pack(Ts... xs) constexpr auto pack(Ts... xs)
{ {
return [=](auto f) { return f(xs...); }; return [=](auto f) { return f(xs...); };
} }
template <class Compare, class P1, class P2>
constexpr auto pack_compare(Compare compare, P1 p1, P2 p2)
{
return p1([&](auto... xs) {
return p2([&](auto... ys) {
auto c = [&](auto x, auto y) -> int {
if(compare(x, y))
return 1;
else if(compare(y, x))
return -1;
else
return 0;
};
return fold([](auto x, auto y) { return x ? x : y; })(c(xs, ys)..., 0);
});
});
}
template <index_int N> template <index_int N>
constexpr auto arg_c() constexpr auto arg_c()
{ {
...@@ -187,7 +223,7 @@ constexpr auto transform_args(F f, Fs... fs) ...@@ -187,7 +223,7 @@ constexpr auto transform_args(F f, Fs... fs)
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \ #define MIGRAPHX_LIFT(...) \
([](auto&&... xs) MIGRAPHX_RETURNS((__VA_ARGS__)(static_cast<decltype(xs)>(xs)...)) [](auto&&... xs) MIGRAPHX_RETURNS((__VA_ARGS__)(static_cast<decltype(xs)>(xs)...))
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_FUNCTIONAL_HPP #endif // MIGRAPHX_GUARD_KERNELS_FUNCTIONAL_HPP
...@@ -13,6 +13,7 @@ struct integral_constant ...@@ -13,6 +13,7 @@ struct integral_constant
using type = integral_constant; using type = integral_constant;
constexpr operator value_type() const noexcept { return value; } constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; } constexpr value_type operator()() const noexcept { return value; }
static constexpr type to() { return {}; }
}; };
// NOLINTNEXTLINE // NOLINTNEXTLINE
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <migraphx/kernels/types.hpp> #include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/vec.hpp> #include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <hip/hip_fp16.h> #include <hip/hip_fp16.h>
#include <hip/math_functions.h> #include <hip/math_functions.h>
...@@ -20,17 +21,28 @@ constexpr T as_float(T x) ...@@ -20,17 +21,28 @@ constexpr T as_float(T x)
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH(name, fname) \ #define MIGRAPHX_DEVICE_MATH(name, fname) \
template <class... Ts> \ template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) MIGRAPHX_RETURNS(fname(xs...)) auto __device__ name(Ts... xs) MIGRAPHX_RETURNS(fname(xs...))
// NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_VEC(name) \
template <class... Ts, MIGRAPHX_REQUIRES(is_any_vec<Ts...>())> \
auto __device__ name(Ts... xs) \
{ \
return vec_transform(xs...)([](auto... ys) { return name(ys...); }); \
}
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_FOR(type, name, fname) \ #define MIGRAPHX_DEVICE_MATH_FOR(type, name, fname) \
template <class... Ts> \ template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(type x, Ts... xs) MIGRAPHX_RETURNS(fname(x, xs...)) auto __device__ name(type x, Ts... xs)->type \
{ \
return fname(x, xs...); \
}
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_DEVICE_MATH_HALF(name, fname) \ #define MIGRAPHX_DEVICE_MATH_HALF(name, fname) \
template <class... Ts> \ template <class... Ts, MIGRAPHX_REQUIRES(not is_any_vec<Ts...>())> \
auto __device__ name(migraphx::half x, Ts... xs) \ auto __device__ name(migraphx::half x, Ts... xs) \
MIGRAPHX_RETURNS(fname(math::as_float(x), math::as_float(xs)...)) MIGRAPHX_RETURNS(fname(math::as_float(x), math::as_float(xs)...))
...@@ -99,21 +111,51 @@ MIGRAPHX_DEVICE_MATH_HALF(tan, ::tan) ...@@ -99,21 +111,51 @@ MIGRAPHX_DEVICE_MATH_HALF(tan, ::tan)
MIGRAPHX_DEVICE_MATH_HALF(tanh, ::tanh) MIGRAPHX_DEVICE_MATH_HALF(tanh, ::tanh)
template <class T, class U> template <class T, class U>
constexpr auto& max(const T& a, const U& b) constexpr auto where(bool cond, const T& a, const U& b)
{ {
return (a < b) ? b : a; return cond ? a : b;
} }
MIGRAPHX_DEVICE_MATH_VEC(abs)
MIGRAPHX_DEVICE_MATH_VEC(acos)
MIGRAPHX_DEVICE_MATH_VEC(acosh)
MIGRAPHX_DEVICE_MATH_VEC(asin)
MIGRAPHX_DEVICE_MATH_VEC(asinh)
MIGRAPHX_DEVICE_MATH_VEC(atan)
MIGRAPHX_DEVICE_MATH_VEC(atanh)
MIGRAPHX_DEVICE_MATH_VEC(ceil)
MIGRAPHX_DEVICE_MATH_VEC(cos)
MIGRAPHX_DEVICE_MATH_VEC(cosh)
MIGRAPHX_DEVICE_MATH_VEC(erf)
MIGRAPHX_DEVICE_MATH_VEC(exp)
MIGRAPHX_DEVICE_MATH_VEC(floor)
MIGRAPHX_DEVICE_MATH_VEC(log)
MIGRAPHX_DEVICE_MATH_VEC(pow)
MIGRAPHX_DEVICE_MATH_VEC(round)
MIGRAPHX_DEVICE_MATH_VEC(rsqrt)
MIGRAPHX_DEVICE_MATH_VEC(sin)
MIGRAPHX_DEVICE_MATH_VEC(sinh)
MIGRAPHX_DEVICE_MATH_VEC(sqrt)
MIGRAPHX_DEVICE_MATH_VEC(tan)
MIGRAPHX_DEVICE_MATH_VEC(tanh)
MIGRAPHX_DEVICE_MATH_VEC(where)
template <class T, class U> template <class T, class U>
constexpr auto& min(const T& a, const U& b) constexpr auto max(const T& a, const U& b)
{ {
return (a > b) ? b : a; return where(a < b, b, a);
} }
template <class T, class U> template <class T, class U>
constexpr T convert(U x) constexpr auto min(const T& a, const U& b)
{ {
return x; return where(a > b, b, a);
}
template <class T, class U>
constexpr auto convert(U v)
{
return vec_transform(v)([](auto x) -> T { return x; });
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -10,13 +10,38 @@ ...@@ -10,13 +10,38 @@
namespace migraphx { namespace migraphx {
template <class T>
struct implicit_conversion_op
{
T x;
template <index_int N, class U>
constexpr operator vec<U, N>() const
{
static_assert(vec_size<T>() == N, "Vector mismatch size");
return __builtin_convertvector(x, vec<U, N>);
}
template <class U>
constexpr operator U() const
{
return x;
}
};
template <class T>
constexpr implicit_conversion_op<T> implicit_conversion(T x)
{
return {x};
}
template <class F, class T, class... Ts> template <class F, class T, class... Ts>
__device__ void pointwise_tensor(index idx, F f, T out, Ts... xs) __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
{ {
preload<typename T::type>(idx, xs...)([&](auto... ps) { preload<typename T::type>(idx, xs...)([&](auto... ps) {
idx.global_stride(out.get_shape().elements(), [&](auto i) { idx.global_stride(out.get_shape().elements(), [&](auto i) {
auto multi_idx = out.get_shape().multi(i); auto multi_idx = out.get_shape().multi(i);
out[multi_idx] = f(ps[multi_idx]...); out[multi_idx] = implicit_conversion(f(ps[multi_idx]...));
}); });
}); });
} }
...@@ -24,7 +49,7 @@ __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs) ...@@ -24,7 +49,7 @@ __device__ void pointwise_tensor(index idx, F f, T out, Ts... xs)
template <class F, class... Ts> template <class F, class... Ts>
__device__ void pointwise(F f, Ts*... ps) __device__ void pointwise(F f, Ts*... ps)
{ {
auto t = transform_args(make_tensors(), rotate_last()); auto t = transform_args(make_tensors(), rotate_last(), auto_vectorize());
t(ps...)([&](auto... xs) { t(ps...)([&](auto... xs) {
auto idx = make_index(); auto idx = make_index();
pointwise_tensor(idx, f, xs...); pointwise_tensor(idx, f, xs...);
......
...@@ -29,7 +29,7 @@ constexpr auto traverse_preload(Shapes... ss) ...@@ -29,7 +29,7 @@ constexpr auto traverse_preload(Shapes... ss)
} }
template <class T, class... Shapes> template <class T, class... Shapes>
constexpr index_int compute_preload_size(Shapes...) constexpr index_int compute_preload_size_c(Shapes...)
{ {
index_int size = 0; index_int size = 0;
traverse_preload<T>(Shapes{}...)( traverse_preload<T>(Shapes{}...)(
...@@ -37,6 +37,12 @@ constexpr index_int compute_preload_size(Shapes...) ...@@ -37,6 +37,12 @@ constexpr index_int compute_preload_size(Shapes...)
return size; return size;
} }
template <class T, class... Shapes>
constexpr auto compute_preload_size(Shapes...)
{
return _c<compute_preload_size_c<T>(Shapes{}...)>;
}
template <class F, class T, class... Ts> template <class F, class T, class... Ts>
__device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs) __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
{ {
...@@ -47,6 +53,8 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs) ...@@ -47,6 +53,8 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
traverse_preload<T>(xs...)( traverse_preload<T>(xs...)(
[&](auto x, auto offset, auto copy) { [&](auto x, auto offset, auto copy) {
if constexpr(copy) if constexpr(copy)
{
if constexpr(decltype(tensor_vec_size(x)){} == 0)
{ {
auto v = vectorize(x); auto v = vectorize(x);
auto b = as_vec(tensor_vec_size(v), buffer + offset); auto b = as_vec(tensor_vec_size(v), buffer + offset);
...@@ -55,6 +63,14 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs) ...@@ -55,6 +63,14 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
return x.with(buffer + offset); return x.with(buffer + offset);
} }
else else
{
auto b = as_vec(tensor_vec_size(x), buffer + offset);
idx.local_stride(x.get_shape().element_space(),
[&](auto i) { b[i] = x.data()[i]; });
return x.with(b);
}
}
else
{ {
return x; return x;
} }
...@@ -78,7 +94,7 @@ template <class T, class... Ts> ...@@ -78,7 +94,7 @@ template <class T, class... Ts>
__device__ auto preload(index idx, Ts... xs) __device__ auto preload(index idx, Ts... xs)
{ {
using type = typename remove_vec<T>::type; using type = typename remove_vec<T>::type;
constexpr auto size = compute_preload_size<type>(xs.get_shape()...); constexpr auto size = decltype(compute_preload_size<type>(xs.get_shape()...)){};
const index_int max_size = 512 * sizeof(type); const index_int max_size = 512 * sizeof(type);
return [=](auto f) { return [=](auto f) {
if constexpr(size > 0 and size < max_size) if constexpr(size > 0 and size < max_size)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <migraphx/kernels/types.hpp> #include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/integral_constant.hpp> #include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/functional.hpp>
namespace migraphx { namespace migraphx {
...@@ -24,6 +25,38 @@ constexpr auto vec_size() ...@@ -24,6 +25,38 @@ constexpr auto vec_size()
return decltype(vec_size(T{})){}; return decltype(vec_size(T{})){};
} }
template <class... Ts>
constexpr auto is_any_vec()
{
if constexpr(sizeof...(Ts) == 0)
return false_type{};
else
return bool_constant<((vec_size<Ts>() + ...) > 0)>{};
}
template <class T, class I>
constexpr auto vec_at(T x, I i)
{
if constexpr(vec_size<T>() == 0)
return x;
else
{
MIGRAPHX_ASSERT(i < vec_size<T>());
return x[i];
}
}
template <class... Ts>
constexpr auto common_vec_size()
{
return fold([](auto x, auto y) {
if constexpr(x > y)
return x;
else
return y;
})(vec_size<Ts>()...);
}
template <index_int N, class T> template <index_int N, class T>
__device__ __host__ auto as_vec(T* x) __device__ __host__ auto as_vec(T* x)
{ {
...@@ -33,5 +66,25 @@ __device__ __host__ auto as_vec(T* x) ...@@ -33,5 +66,25 @@ __device__ __host__ auto as_vec(T* x)
return reinterpret_cast<vec<T, N>*>(x); return reinterpret_cast<vec<T, N>*>(x);
} }
template <class... Ts>
constexpr auto vec_transform(Ts... xs)
{
return [=](auto f) {
if constexpr(is_any_vec<Ts...>())
{
using type = decltype(f(vec_at(xs, 0)...));
constexpr auto size = common_vec_size<Ts...>();
vec<type, size> result = {0};
for(int i = 0; i < size; i++)
result[i] = f(vec_at(xs, i)...);
return result;
}
else
{
return f(xs...);
}
};
}
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP #endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
...@@ -7,40 +7,70 @@ ...@@ -7,40 +7,70 @@
namespace migraphx { namespace migraphx {
template <class T> template <class T>
constexpr auto tensor_vec_size(T) constexpr auto tensor_vec_size()
{ {
return vec_size<typename T::type>(); return vec_size<typename T::type>();
} }
template <index_int N, class Shape> template <class T>
constexpr auto as_vec_shape(Shape s) constexpr auto tensor_vec_size(T)
{ {
auto lens = transform(s.lens, s.strides, [](auto len, auto stride) { return tensor_vec_size<T>();
if(stride == 1) }
return len / N;
template <index_int N, class Shape, class Axis>
constexpr auto shape_step(Shape s, Axis)
{
static_assert(N > 0, "Vector size must be non-zero");
return sequence(s.lens.size(), [&](auto... is) {
auto lens = transform(s.lens, index_ints<is...>{}, [&](auto i, auto j) {
constexpr auto axis = Axis::to();
MIGRAPHX_ASSERT(i != 0);
MIGRAPHX_ASSERT(j != axis or i % N == 0);
if(j == axis)
return i / N;
else else
return len; return i;
}); });
auto strides = transform(s.strides, [](auto stride) { auto strides = transform(s.strides, index_ints<is...>{}, [&](auto i, auto j) {
if(stride == 1) constexpr auto axis = Axis::to();
return stride; // If stride of the axis is zero then we dont need to adjust the other strides
return stride / N; if(Shape{}.strides[axis] == 0)
return i;
MIGRAPHX_ASSERT(j == axis or i % N == 0);
if(j == axis)
return i;
else
return i / N;
}); });
MIGRAPHX_ASSERT(make_shape(lens, strides).element_space() * N == s.element_space()); MIGRAPHX_ASSERT(make_shape(lens, strides).elements() * N == s.elements());
MIGRAPHX_ASSERT(strides[Axis{}] == 0 or
make_shape(lens, strides).element_space() * N == s.element_space());
return make_shape(lens, strides); return make_shape(lens, strides);
});
} }
template <index_int N, class T> // Bools can not be used as a vector type so convert it to int8
__device__ __host__ auto as_vec(T x) template <class T>
__device__ __host__ T* remove_bool(T* x)
{
return x;
}
inline __device__ __host__ int8_t* remove_bool(bool* x) { return reinterpret_cast<int8_t*>(x); }
template <index_int N, class T, class Axis>
__device__ __host__ auto as_vec(T x, Axis axis)
{ {
if constexpr(N == 0) if constexpr(N == 0)
return x; return x;
else else
return make_tensor_view(as_vec<N>(x.data()), as_vec_shape<N>(x.get_shape())); return make_tensor_view(as_vec<N>(remove_bool(x.data())),
shape_step<N>(x.get_shape(), axis));
} }
template <index_int N, class T, class Axis> template <index_int N, class T, class Axis>
constexpr auto tensor_step(T x, Axis) constexpr auto tensor_step(T x, Axis axis)
{ {
if constexpr(N == 0) if constexpr(N == 0)
{ {
...@@ -49,17 +79,8 @@ constexpr auto tensor_step(T x, Axis) ...@@ -49,17 +79,8 @@ constexpr auto tensor_step(T x, Axis)
else else
{ {
constexpr auto s = decltype(x.get_shape()){}; constexpr auto s = decltype(x.get_shape()){};
MIGRAPHX_ASSERT(s.strides[Axis{}] == 0); MIGRAPHX_ASSERT(s.strides[axis] == 0);
return sequence(x.get_shape().lens.size(), [&](auto... is) { return make_tensor_view(x.data(), shape_step<N>(s, axis));
auto lens = transform(s.lens, index_ints<is...>{}, [&](auto i, auto j) {
constexpr auto axis = Axis{};
if(j == axis)
return i / N;
else
return i;
});
return make_tensor_view(x.data(), make_shape(lens, s.strides));
});
} }
} }
...@@ -69,45 +90,71 @@ __device__ __host__ auto as_vec(IntegralConstant ic, T&& x) ...@@ -69,45 +90,71 @@ __device__ __host__ auto as_vec(IntegralConstant ic, T&& x)
return as_vec<ic>(x); return as_vec<ic>(x);
} }
template <class Shape>
constexpr index_int find_vector_axis_c(Shape s)
{
// Find the fastest axis that is not broadcasted
index_int axis = 0;
for(index_int i = 1; i < s.lens.size(); i++)
{
if(s.strides[i] == 0)
continue;
if(s.strides[axis] == 0 or
pack_compare(less{}, pack(s.strides[i], s.lens[i]), pack(s.strides[axis], s.lens[axis])))
axis = i;
}
return axis;
}
template <class... Shapes> template <class... Shapes>
constexpr index_int find_vector_axis(Shapes... ss) constexpr index_int find_vector_axis_c(Shapes... ss)
{ {
const bool all_broadcasted = (ss.broadcasted() and ...);
index_int axis = 0; index_int axis = 0;
bool b = false; bool b = false;
by([&](auto s) { by([&](auto s) {
if(b) if(b)
return; return;
auto it = find(s.strides.begin(), s.strides.end(), 1); // Skip broadcasted shapes if there are shapes not broadcasted
if(it == s.strides.end()) if(not all_broadcasted and s.broadcasted())
return; return;
axis = it - s.strides.begin(); axis = find_vector_axis_c(s);
if(s.strides[axis] == 1)
b = true; b = true;
})(ss...); })(ss...);
if(not b)
return -1;
return axis; return axis;
} }
template <class... Shapes>
constexpr auto find_vector_axis(Shapes...)
{
return _c<find_vector_axis_c(Shapes{}...)>;
}
template <index_int N, class Axis, class... Shapes> template <index_int N, class Axis, class... Shapes>
constexpr auto is_vectorizable(Axis axis, Shapes... ss) constexpr auto is_vectorizable_c(Axis axis, Shapes... ss)
{ {
return (((ss.lens[axis] % N) == 0 and ss.strides[axis] == 1) and ...); return ((axis < ss.lens.size() and ss.lens[axis] % N == 0 and
// Only vectorize broadcasted types with stride 0, since this causes issues in the
// preloader
((not ss.broadcasted() and ss.strides[axis] == 1) or ss.strides[axis] == 0)) and
...);
} }
template <index_int N, class Shape> template <index_int N, class Axis, class... Shapes>
constexpr bool is_vectorizable(Shape s) constexpr auto is_vectorizable(Axis, Shapes...)
{ {
auto it = find(s.strides.begin(), s.strides.end(), 1); return _c<is_vectorizable_c<N>(Axis::to(), Shapes{}...)>;
if(it == s.strides.end())
return false;
auto axis = it - s.strides.begin();
return (s.lens[axis] % N) == 0 and s.strides[axis] == 1;
} }
template <class P> template <class P>
constexpr auto find_vectorize_size(P pred) constexpr auto find_vectorize_size(P pred)
{ {
if constexpr(pred(_c<4>)) if constexpr(decltype(pred(_c<4>)){})
return _c<4>; return _c<4>;
else if constexpr(pred(_c<2>)) else if constexpr(decltype(pred(_c<2>)){})
return _c<2>; return _c<2>;
else else
return _c<0>; return _c<0>;
...@@ -116,11 +163,12 @@ constexpr auto find_vectorize_size(P pred) ...@@ -116,11 +163,12 @@ constexpr auto find_vectorize_size(P pred)
template <class T> template <class T>
__host__ __device__ auto vectorize(T x) __host__ __device__ auto vectorize(T x)
{ {
if constexpr(vec_size<T>() == 0) if constexpr(tensor_vec_size<T>() == 0)
{ {
constexpr auto axis = find_vector_axis(x.get_shape());
constexpr auto n = constexpr auto n =
find_vectorize_size([&](auto i) { return _c<is_vectorizable<i>(x.get_shape())>; }); find_vectorize_size([&](auto i) { return is_vectorizable<i>(axis, x.get_shape()); });
return as_vec<n>(x); return as_vec<n>(x, axis);
} }
else else
{ {
...@@ -128,25 +176,34 @@ __host__ __device__ auto vectorize(T x) ...@@ -128,25 +176,34 @@ __host__ __device__ auto vectorize(T x)
} }
} }
inline __device__ __host__ auto auto_vectorize() template <class F, class... Ts>
inline __device__ __host__ auto auto_vectorize_impl(F f, Ts... xs)
{ {
return [](auto... xs) {
return [=](auto f) {
// TODO: Just check there a single axis of 1 // TODO: Just check there a single axis of 1
constexpr bool packed_or_broadcasted = constexpr bool packed_or_broadcasted =
((xs.get_shape().packed() or xs.get_shape().broadcasted()) and ...); ((xs.get_shape().packed() or xs.get_shape().broadcasted()) and ...);
if constexpr(packed_or_broadcasted) if constexpr(packed_or_broadcasted)
{ {
constexpr auto axis = find_vector_axis(xs.get_shape()...); constexpr auto axis = decltype(find_vector_axis(xs.get_shape()...)){};
constexpr auto n = find_vectorize_size( constexpr auto n = find_vectorize_size(
[&](auto i) { return _c<is_vectorizable<i>(axis, xs.get_shape()...)>; }); [&](auto i) { return is_vectorizable<i>(axis, xs.get_shape()...); });
by( by(
[&](auto x) { [&](auto x) {
constexpr auto s = x.get_shape(); constexpr auto s = decltype(x.get_shape()){};
if constexpr(axis < s.strides.size())
{
MIGRAPHX_ASSERT(s.strides[axis] == 0 or s.strides[axis] == 1);
MIGRAPHX_ASSERT(s.lens[axis] > 0);
MIGRAPHX_ASSERT(n == 0 or s.lens[axis] % n == 0);
if constexpr(s.strides[axis] == 0) if constexpr(s.strides[axis] == 0)
return tensor_step<n>(x, axis); return tensor_step<n>(x, axis);
else else
return as_vec<n>(x); return as_vec<n>(x, axis);
}
else
{
return x;
}
}, },
f)(xs...); f)(xs...);
} }
...@@ -154,8 +211,11 @@ inline __device__ __host__ auto auto_vectorize() ...@@ -154,8 +211,11 @@ inline __device__ __host__ auto auto_vectorize()
{ {
f(xs...); f(xs...);
} }
}; }
};
inline __device__ __host__ auto auto_vectorize()
{
return [](auto... xs) { return [=](auto f) { auto_vectorize_impl(f, xs...); }; };
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -1618,6 +1618,22 @@ def greater_bool_test(): ...@@ -1618,6 +1618,22 @@ def greater_bool_test():
return ([node1, node2], [x1, x2], [y]) return ([node1, node2], [x1, x2], [y])
@onnx_test
def greaterorequal_test():
x1 = helper.make_tensor_value_info('x1', TensorProto.FLOAT, [3])
x2 = helper.make_tensor_value_info('x2', TensorProto.FLOAT, [3])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [3])
node = onnx.helper.make_node(
'GreaterOrEqual',
inputs=['x1', 'x2'],
outputs=['y'],
)
return ([node], [x1, x2], [y])
@onnx_test @onnx_test
def group_conv_test(): def group_conv_test():
x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 4, 16, 16]) x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 4, 16, 16])
...@@ -1634,6 +1650,60 @@ def group_conv_test(): ...@@ -1634,6 +1650,60 @@ def group_conv_test():
return ([node], [x, y], [z]) return ([node], [x, y], [z])
@onnx_test
def hardsigmoid_default_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [1, 3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [1, 3, 4, 5])
node = onnx.helper.make_node('HardSigmoid', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def hardsigmoid_double_test():
x = helper.make_tensor_value_info('x', TensorProto.DOUBLE, [1, 3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.DOUBLE, [1, 3, 4, 5])
node = onnx.helper.make_node('HardSigmoid',
inputs=['x'],
outputs=['y'],
alpha=0.3,
beta=0.7)
return ([node], [x], [y])
@onnx_test
def hardsigmoid_half_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [1, 3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT16, [1, 3, 4, 5])
node = onnx.helper.make_node('HardSigmoid', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def hardsigmoid_verify_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 5])
node = onnx.helper.make_node('HardSigmoid', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def hardswish_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [2, 5])
node = onnx.helper.make_node('HardSwish', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test @onnx_test
def if_else_test(): def if_else_test():
x = onnx.helper.make_tensor_value_info('x', onnx.TensorProto.FLOAT, [2, 3]) x = onnx.helper.make_tensor_value_info('x', onnx.TensorProto.FLOAT, [2, 3])
...@@ -2692,6 +2762,80 @@ def maxpool_same_upper_test(): ...@@ -2692,6 +2762,80 @@ def maxpool_same_upper_test():
return ([node], [x], [y]) return ([node], [x], [y])
@onnx_test
def mean_broadcast_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 3, 4])
data_1 = helper.make_tensor_value_info('1', TensorProto.FLOAT,
[1, 2, 3, 4])
data_2 = helper.make_tensor_value_info('2', TensorProto.FLOAT, [4])
data_3 = helper.make_tensor_value_info('3', TensorProto.FLOAT, [1])
data_4 = helper.make_tensor_value_info('4', TensorProto.FLOAT, [2, 3, 1])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT,
[1, 2, 3, 4])
node = onnx.helper.make_node("Mean",
inputs=["0", "1", "2", "3", "4"],
outputs=["mean"])
return ([node], [data_0, data_1, data_2, data_3, data_4], [mean])
@onnx_test
def mean_fp16_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT16, [1, 2, 3])
data_1 = helper.make_tensor_value_info('1', TensorProto.FLOAT16, [1, 2, 3])
data_2 = helper.make_tensor_value_info('2', TensorProto.FLOAT16, [1, 2, 3])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT16,
[1, 2, 3])
node = onnx.helper.make_node("Mean",
inputs=["0", "1", "2"],
outputs=["mean"])
return ([node], [data_0, data_1, data_2], [mean])
@onnx_test
def mean_invalid_broadcast_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 2, 3])
data_1 = helper.make_tensor_value_info('1', TensorProto.FLOAT, [1, 2, 3])
data_2 = helper.make_tensor_value_info('2', TensorProto.FLOAT, [1, 2, 4])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT, [1, 2, 3])
node = onnx.helper.make_node("Mean",
inputs=["0", "1", "2"],
outputs=["mean"])
return ([node], [data_0, data_1, data_2], [mean])
@onnx_test
def mean_single_input_test():
data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 2, 3])
mean = helper.make_tensor_value_info('mean', TensorProto.FLOAT, [1, 2, 3])
node = onnx.helper.make_node("Mean", inputs=["0"], outputs=["mean"])
return ([node], [data_0], [mean])
@onnx_test
def mean_test():
data = [
helper.make_tensor_value_info(str(i), TensorProto.DOUBLE, [2, 2, 2])
for i in range(10)
]
data_names = [str(i) for i in range(10)]
mean = helper.make_tensor_value_info('mean', TensorProto.DOUBLE, [2, 2, 2])
node = onnx.helper.make_node("Mean", inputs=data_names, outputs=["mean"])
return ([node], data, [mean])
@onnx_test @onnx_test
def min_test(): def min_test():
a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3]) a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])
...@@ -2725,6 +2869,21 @@ def multinomial_test(): ...@@ -2725,6 +2869,21 @@ def multinomial_test():
return ([node], [input], [output]) return ([node], [input], [output])
@onnx_test
def multinomial_generated_seed_test():
sample_size = 10
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10])
node = onnx.helper.make_node('Multinomial',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test @onnx_test
def multinomial_dtype_error_test(): def multinomial_dtype_error_test():
sample_size = 10 sample_size = 10
...@@ -3176,6 +3335,21 @@ def randomnormal_dtype_error_test(): ...@@ -3176,6 +3335,21 @@ def randomnormal_dtype_error_test():
return ([node], [], [output]) return ([node], [], [output])
@onnx_test
def randomnormal_generated_seed_test():
sample_size = 10
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10])
node = onnx.helper.make_node('RandomNormal',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test @onnx_test
def randomnormal_shape_error_test(): def randomnormal_shape_error_test():
dtype = 1 dtype = 1
...@@ -3266,6 +3440,21 @@ def randomuniform_dtype_error_test(): ...@@ -3266,6 +3440,21 @@ def randomuniform_dtype_error_test():
return ([node], [], [output]) return ([node], [], [output])
@onnx_test
def randomuniform_generated_seed_test():
sample_size = 10
input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10])
output = helper.make_tensor_value_info("output", TensorProto.INT32,
[1, 10])
node = onnx.helper.make_node('RandomUniform',
inputs=['input'],
sample_size=sample_size,
outputs=['output'])
return ([node], [input], [output])
@onnx_test @onnx_test
def randomuniform_shape_error_test(): def randomuniform_shape_error_test():
dtype = 1 dtype = 1
...@@ -4290,6 +4479,44 @@ def softmax_nonstd_input_test(): ...@@ -4290,6 +4479,44 @@ def softmax_nonstd_input_test():
return ([node0, node1], [x], [y]) return ([node0, node1], [x], [y])
@onnx_test
def softsign_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [5])
node = onnx.helper.make_node('Softsign', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
def softplus_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT, [5])
node = onnx.helper.make_node('Softplus', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test
def softsign_nd_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT16, [3, 4, 5])
node = onnx.helper.make_node('Softsign', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
def softplus_nd_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [3, 4, 5])
y = helper.make_tensor_value_info('y', TensorProto.FLOAT16, [3, 4, 5])
node = onnx.helper.make_node('Softplus', inputs=['x'], outputs=['y'])
return ([node], [x], [y])
@onnx_test @onnx_test
def split_minus_axis_test(): def split_minus_axis_test():
x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 15]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 15])
...@@ -4847,6 +5074,25 @@ def unknown_aten_test(): ...@@ -4847,6 +5074,25 @@ def unknown_aten_test():
return ([node], [x, y], [a]) return ([node], [x, y], [a])
@onnx_test
def upsample_linear_test():
scales = np.array([1.0, 1.0, 2.0, 2.0], dtype=np.float32)
scales_tensor = helper.make_tensor(name='scales',
data_type=TensorProto.FLOAT,
dims=scales.shape,
vals=scales.flatten().astype(
np.float32))
X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [1, 1, 2, 2])
Y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, [])
node = onnx.helper.make_node('Upsample',
inputs=['X', '', 'scales'],
outputs=['Y'],
mode='linear')
return ([node], [X], [Y], [scales_tensor])
@onnx_test @onnx_test
def upsample_test(): def upsample_test():
scales = np.array([1.0, 1.0, 2.0, 3.0], dtype=np.float32) scales = np.array([1.0, 1.0, 2.0, 3.0], dtype=np.float32)
......
No preview for this file type
greaterorequal_test:g

x1
x2y"GreaterOrEqualgreaterorequal_testZ
x1

Z
x2

b
y

B
\ No newline at end of file
hardsigmoid_default_test:i

xy" HardSigmoidhardsigmoid_default_testZ
x




b
y




B
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment