Commit 0369e974 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'batch_report' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into mi100_opts

parents 3a474fca d70fd0df
......@@ -32,6 +32,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
{"Log", "log"},
{"LRN", "lrn"},
{"Neg", "neg"},
{"NonMaxSuppression", "nonmaxsuppression"},
{"Reciprocal", "recip"},
{"Relu", "relu"},
{"Round", "round"},
......@@ -49,7 +50,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
bool needs_contiguous(const std::string& op_name) const
{
return contains({"flatten", "gather", "scatter"}, op_name);
return contains({"flatten", "gather", "nonmaxsuppression", "scatter"}, op_name);
}
instruction_ref parse(const op_desc& opd,
......
......@@ -66,10 +66,8 @@ struct parse_matmul : op_parser<parse_matmul>
make_op("multibroadcast", {{"out_lens", l1_broadcasted_lens}}), l1);
}
}
auto dot_res =
info.add_instruction(make_op(opd.op_name, {{"alpha", 1}, {"beta", 0}}), bl0, bl1);
int64_t num_axis = static_cast<int64_t>(dot_res->get_shape().lens().size());
instruction_ref dot_res = info.add_instruction(make_op(opd.op_name), bl0, bl1);
int64_t num_axis = static_cast<int64_t>(dot_res->get_shape().lens().size());
if(is_a_prepended)
{
dot_res = info.add_instruction(make_op("squeeze", {{"axes", {num_axis - 2}}}), dot_res);
......
......@@ -9,7 +9,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
template <class T>
std::vector<std::size_t> nonzero_indices(const std::vector<T>& data)
static std::vector<std::size_t> nonzero_indices(const std::vector<T>& data)
{
std::vector<std::size_t> indices;
for(std::size_t i = 0; i < data.size(); ++i)
......@@ -31,30 +31,35 @@ struct parse_nonzero : op_parser<parse_nonzero>
std::vector<instruction_ref> args) const
{
migraphx::argument data_arg = args.back()->eval();
check_arg_empty(data_arg, "PARSE_NONZERO: cannot support non-constant input!");
std::vector<std::size_t> indices;
data_arg.visit([&](auto val) {
using val_type = std::remove_cv_t<typename decltype(val)::value_type>;
std::vector<val_type> vec_data;
vec_data.assign(val.begin(), val.end());
indices = nonzero_indices(vec_data);
});
if(data_arg.empty())
{
return info.add_instruction(make_op("nonzero"), args);
}
else
{
std::vector<std::size_t> indices;
data_arg.visit([&](auto val) {
using val_type = std::remove_cv_t<typename decltype(val)::value_type>;
std::vector<val_type> vec_data;
vec_data.assign(val.begin(), val.end());
indices = nonzero_indices(vec_data);
});
shape in_s = args[0]->get_shape();
shape out_s{shape::int64_type, {in_s.lens().size(), indices.size()}};
shape in_s = args[0]->get_shape();
shape out_s{shape::int64_type, {in_s.lens().size(), indices.size()}};
std::vector<int64_t> out_data(out_s.elements());
for(std::size_t i = 0; i < indices.size(); ++i)
{
auto idx = in_s.multi(indices[i]);
for(std::size_t j = 0; j < in_s.lens().size(); ++j)
std::vector<int64_t> out_data(out_s.elements());
for(std::size_t i = 0; i < indices.size(); ++i)
{
out_data[out_s.index({j, i})] = idx[j];
auto idx = in_s.multi(indices[i]);
for(std::size_t j = 0; j < in_s.lens().size(); ++j)
{
out_data[out_s.index({j, i})] = idx[j];
}
}
}
return info.add_literal(literal(out_s, out_data));
return info.add_literal(literal(out_s, out_data));
}
}
};
......
......@@ -9,21 +9,20 @@ namespace onnx {
auto compute_type(shape::type_t t1, shape::type_t t2)
{
const static std::unordered_map<int, int> op_order = {
{static_cast<int>(shape::int8_type), 1},
{static_cast<int>(shape::uint8_type), 2},
{static_cast<int>(shape::int16_type), 3},
{static_cast<int>(shape::uint16_type), 4},
{static_cast<int>(shape::int32_type), 5},
{static_cast<int>(shape::uint32_type), 6},
{static_cast<int>(shape::int64_type), 7},
{static_cast<int>(shape::uint64_type), 8},
{static_cast<int>(shape::half_type), 9},
{static_cast<int>(shape::float_type), 10},
{static_cast<int>(shape::double_type), 11}};
const static std::unordered_map<int, int> op_order = {{shape::int8_type, 1},
{shape::uint8_type, 2},
{shape::int16_type, 3},
{shape::uint16_type, 4},
{shape::int32_type, 5},
{shape::uint32_type, 6},
{shape::int64_type, 7},
{shape::uint64_type, 8},
{shape::half_type, 9},
{shape::float_type, 10},
{shape::double_type, 11}};
int it1 = static_cast<int>(t1);
int it2 = static_cast<int>(t2);
int it1 = t1;
int it2 = t2;
if(!contains(op_order, it1) or !contains(op_order, it2))
{
MIGRAPHX_THROW("PARSE_POW: Input data type not supported!");
......
......@@ -334,7 +334,7 @@ struct parse_resize : op_parser<parse_resize>
auto ins_delta = info.add_literal(dim_s, delta_data);
// slice the data
int64_t slc_stride = static_cast<int64_t>(dim_lens[0]);
int64_t slc_stride = dim_lens[0];
auto low = info.add_instruction(
make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {slc_stride}}}),
data);
......
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_roialign : op_parser<parse_roialign>
{
std::vector<op_desc> operators() const { return {{"RoiAlign"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
const std::vector<instruction_ref>& args) const
{
std::string coord_trans_mode = "half_pixel";
if(contains(info.attributes, "coordinate_transformation_mode"))
{
coord_trans_mode = info.attributes.at("coordinate_transformation_mode").s();
}
if(not contains({"half_pixel", "output_half_pixel"}, coord_trans_mode))
{
MIGRAPHX_THROW("coordinate_transformation_mode \"" + coord_trans_mode +
"\": invalid value!");
}
std::string mode = "avg";
if(contains(info.attributes, "mode"))
{
mode = info.attributes.at("mode").s();
}
int64_t output_height = 1;
if(contains(info.attributes, "output_height"))
{
output_height = info.attributes.at("output_height").i();
}
int64_t output_width = 1;
if(contains(info.attributes, "output_width"))
{
output_width = info.attributes.at("output_width").i();
}
int64_t sampling_ratio = 0;
if(contains(info.attributes, "sampling_ratio"))
{
sampling_ratio = info.attributes.at("sampling_ratio").i();
}
float spatial_scale = 1.0f;
if(contains(info.attributes, "spatial_scale"))
{
spatial_scale = info.attributes.at("spatial_scale").f();
}
return info.add_instruction(make_op("roialign",
{{"coordinate_transformation_mode", coord_trans_mode},
{"mode", mode},
{"output_height", output_height},
{"output_width", output_width},
{"sampling_ratio", sampling_ratio},
{"spatial_scale", spatial_scale}}),
args);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_spacetodepth : op_parser<parse_spacetodepth>
{
std::vector<op_desc> operators() const { return {{"SpaceToDepth"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
auto s = args[0]->get_shape();
// blocksize attribute of SpaceToDepth
int blocksize = 1; // if blockSize of 1 then, this is a no-op
if(contains(info.attributes, "blocksize"))
{
blocksize = info.attributes.at("blocksize").i();
}
if(blocksize < 1)
{
// blockSize less than 1 would rather result in DepthToSpace instead of SpaceToDepth
MIGRAPHX_THROW("SpaceToDepth: blocksize is less than 1");
}
// calculate dimensions
auto res_lens = s.lens(); // {N, C, H, W}
if(((res_lens[2] % blocksize) == 0) and ((res_lens[3] % blocksize) == 0))
{
// Co = C * (blocksize ^ 2)
res_lens[1] = res_lens[1] * blocksize * blocksize;
// Ho = (H / blocksize)
res_lens[2] = res_lens[2] / blocksize;
// Wo = (W / blocksize)
res_lens[3] = res_lens[3] / blocksize;
} // res_shape = (N, Co, Ho, Wo)
else
MIGRAPHX_THROW("SpaceToDepth: div by blocksize quotient not int ");
auto trans_lens = s.lens(); // {N, C, H, W}
trans_lens[2] = res_lens[2];
trans_lens[3] = blocksize;
trans_lens.push_back(res_lens[3]);
trans_lens.push_back(blocksize); // {N, C, Ho, blocksize, Wo, blocksize}
std::vector<int64_t> perm = {0, 3, 5, 1, 2, 4};
auto temp1 = info.add_instruction(make_op("reshape", {{"dims", trans_lens}}), args[0]);
auto temp2 = info.add_instruction(make_op("transpose", {{"permutation", perm}}), temp1);
return info.add_instruction(make_op("reshape", {{"dims", res_lens}}),
info.make_contiguous(temp2));
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -24,7 +24,7 @@ struct parse_split : op_parser<parse_split>
}
auto lens = args[0]->get_shape().lens();
int64_t n_rank = static_cast<int64_t>(lens.size());
int64_t n_rank = lens.size();
int64_t tuned_axis = tune_axis(n_rank, axis, opd.op_name);
std::vector<int64_t> vec_splits;
......
......@@ -13,6 +13,7 @@
#include <migraphx/algorithm.hpp>
#include <migraphx/output_iterator.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/marker.hpp>
#include <iostream>
#include <sstream>
#include <algorithm>
......@@ -309,8 +310,11 @@ std::vector<argument> program::eval(parameter_map params) const
double t2 = t.record<milliseconds>();
std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl;
if(trace_level > 1 and ins->name().front() != '@' and
ins->name() != "load")
std::cout << "Output: " << result << std::endl;
ins->name() != "load" and not result.empty())
{
target tgt = make_target(this->impl->target_name);
std::cout << "Output: " << tgt.copy_from(result) << std::endl;
}
return result;
}));
}
......@@ -504,7 +508,28 @@ std::string perf_group(const operation& op)
return op.name();
}
void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) const
void program::mark(const parameter_map& params, marker&& m)
{
auto& ctx = this->impl->ctx;
// Run once by itself
eval(params);
ctx.finish();
// Start marking
m.mark_start(*this);
generic_eval(*this, ctx, params, always([&](auto ins, auto f) {
argument result;
m.mark_start(ins);
result = f();
m.mark_stop(ins);
return result;
}));
m.mark_stop(*this);
}
void program::perf_report(std::ostream& os,
std::size_t n,
parameter_map params,
std::size_t batch) const
{
auto& ctx = this->impl->ctx;
// Run once by itself
......@@ -597,7 +622,8 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
os << std::endl;
os << "Rate: " << rate << "/sec" << std::endl;
os << "Batch size: " << batch << std::endl;
os << "Rate: " << rate * batch << "/sec" << std::endl;
os << "Total time: " << total_time << "ms" << std::endl;
os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
os << "Overhead time: " << overhead_time << "ms"
......
#include <migraphx/remap.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/add.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace {
struct find_dot_add
{
auto matcher() const
{
return match::name("add")(match::any_of(
match::args(match::name("dot")(match::nargs(2)).bind("dot"), match::any().bind("a")),
match::args(match::used_once().bind("a"),
match::name("dot")(match::nargs(2)).bind("dot"))));
}
void apply(module& p, match::matcher_result r) const
{
auto ins = r.result;
auto dot_ins = r.instructions["dot"];
auto a_ins = r.instructions["a"];
auto dot = any_cast<op::dot>(dot_ins->get_operator());
dot.beta = 1;
p.replace_instruction(ins, dot, dot_ins->inputs()[0], dot_ins->inputs()[1], a_ins);
}
};
} // namespace
void remap::apply(module& p) const { match::find_matches(p, find_dot_add{}); }
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -269,7 +269,7 @@ std::vector<instruction_ref> rewrite_rnn::vanilla_rnn_cell(bool is_forward,
instruction_ref hidden_out = prog.end();
instruction_ref last_out{};
last_out = prog.insert_instruction(ins, make_op("unsqueeze", {{"axes", {0, 1}}}), sih);
long seq_len = static_cast<long>(get_seq_len(prog, seq, seq_lens));
long seq_len = get_seq_len(prog, seq, seq_lens);
for(long i = 0; i < seq_len; i++)
{
long seq_index = is_forward ? i : (seq_len - 1 - i);
......@@ -556,7 +556,7 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
instruction_ref last_output{};
migraphx::shape seq_shape = seq->get_shape();
migraphx::shape r_shape = r->get_shape();
long hs = static_cast<long>(r_shape.lens()[2]);
long hs = r_shape.lens()[2];
migraphx::shape ss(seq_shape.type(), {seq_shape.lens()[1], r_shape.lens()[2]});
std::vector<float> data(ss.elements(), 1.0f);
......@@ -613,7 +613,7 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
rb_h);
}
long seq_len = static_cast<long>(get_seq_len(prog, seq, seq_lens));
long seq_len = get_seq_len(prog, seq, seq_lens);
for(long i = 0; i < seq_len; i++)
{
long seq_index = is_forward ? i : (seq_len - 1 - i);
......@@ -1032,7 +1032,7 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
instruction_ref last_cell_output{};
migraphx::shape r_shape = r->get_shape();
long hs = static_cast<long>(r_shape.lens()[2]);
long hs = r_shape.lens()[2];
auto bs = ih->get_shape().lens()[1];
std::vector<int64_t> perm{1, 0};
......@@ -1094,7 +1094,7 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
ins, make_op("broadcast", {{"axis", 1}, {"out_lens", ic_lens}}), pphf);
}
long seq_len = static_cast<long>(get_seq_len(prog, seq, seq_lens));
long seq_len = get_seq_len(prog, seq, seq_lens);
for(long i = 0; i < seq_len; ++i)
{
long seq_index = is_forward ? i : (seq_len - 1 - i);
......
......@@ -84,13 +84,7 @@ struct match_find_quantizable_ops
}
else if(qop->name() == "dot")
{
auto dot_op = any_cast<op::dot>(qop->get_operator());
if(!(float_equal(dot_op.alpha, 1.0f) and float_equal(dot_op.beta, 0.0f)))
return;
if(qop_args.size() == 3)
qop_args.pop_back();
dq = m.insert_instruction(
qop, migraphx::make_op("quant_dot", {{"alpha", 1}, {"beta", 0}}), qop_args);
dq = m.insert_instruction(qop, migraphx::make_op("quant_dot"), qop_args);
}
auto ins_type = qop->get_shape().type();
dq_scale = m.add_literal(literal({ins_type}, {scale}));
......
......@@ -539,6 +539,46 @@ struct find_reshape_cont
}
};
// match sequence of transpose --> contiguous --> reshaper_op
auto match_transpose_contiguous_reshaper()
{
return match::name({"reshape", "squeeze", "unsqueeze"})(
match::used_once(),
match::args(
match::name("contiguous")(
match::used_once(), match::args(match::transpose_shape().bind("trans_ins")))
.bind("cont_ins")))
.bind("reshaper_ins");
};
// finds the pattern of transpose --> contiguous --> reshaper_op --> unary
// application of this matcher moves the unary operation before the contiguous so it becomes
// transpose --> unary --> contiguous --> reshaper_op. later pointwise sub-module can be created out
// of unary --> contiguous --> reshaper_op. Such pattern appears in depthToSpace or spaceToDepth
// operator.
struct find_transpose_contiguous_reshaper_unary
{
auto matcher() const
{
return pointwise(match::used_once(),
match::nargs(1),
match::args(match_transpose_contiguous_reshaper()));
}
void apply(module& p, match::matcher_result r) const
{
auto ins = r.result;
auto reshaper_ins = r.instructions["reshaper_ins"];
auto trans_ins = r.instructions["trans_ins"];
auto cont_ins = r.instructions["cont_ins"];
auto unary_op_name = ins->get_operator().name();
auto unary_ins = p.insert_instruction(cont_ins, make_op(unary_op_name), trans_ins);
auto new_cont_ins = p.insert_instruction(cont_ins, make_op("contiguous"), unary_ins);
// older cont and reshape are removed by deadcode elimination
p.replace_instruction(ins, reshaper_ins->get_operator(), new_cont_ins);
}
};
void simplify_reshapes::apply(module& p) const
{
for(int i = 0; i < 2; i++)
......@@ -553,7 +593,8 @@ void simplify_reshapes::apply(module& p) const
find_concat_transpose{},
find_nested_convert{},
find_nested_slice{},
find_nested_concat{});
find_nested_concat{},
find_transpose_contiguous_reshaper_unary{});
dead_code_elimination{}.apply(p);
}
}
......
......@@ -33,8 +33,6 @@ rocm_set_soversion(migraphx_cpu ${MIGRAPHX_SO_VERSION})
set(MIGRAPHX_ENABLE_ZENDNN Off CACHE BOOL "")
find_package(Threads)
if(MIGRAPHX_ENABLE_ZENDNN)
find_path(ZENDNN_INC_PATH zendnn.hpp)
find_library(ZENDNN_LIB amdZenDNN)
......@@ -53,7 +51,7 @@ if(MIGRAPHX_ENABLE_ZENDNN)
else()
target_link_libraries(migraphx_cpu PRIVATE DNNL::dnnl)
endif()
target_link_libraries(migraphx_cpu PRIVATE migraphx Threads::Threads)
target_link_libraries(migraphx_cpu PRIVATE migraphx)
find_package(OpenMP)
target_link_libraries(migraphx_cpu PUBLIC OpenMP::OpenMP_CXX)
......
......@@ -73,7 +73,7 @@ dnnl::memory::desc to_dnnl_memory_desc(const shape& s)
dnnl::memory to_dnnl_memory(const dnnl::memory::desc& desc, const argument& a)
{
return dnnl::memory(desc, get_dnnl_context().engine, a.data());
return {desc, get_dnnl_context().engine, a.data()};
}
dnnl::memory to_dnnl_memory(const argument& a)
......
......@@ -3,7 +3,6 @@
#include <migraphx/check_context.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/decompose.hpp>
#include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp>
......@@ -14,7 +13,6 @@
#include <migraphx/memory_coloring.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/remap.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
......@@ -52,8 +50,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{},
eliminate_data_type{unsupported_types, shape::type_t::float_type},
dead_code_elimination{},
decompose{},
dead_code_elimination{},
simplify_reshapes{},
eliminate_identity{},
eliminate_pad{},
......
......@@ -60,6 +60,7 @@ add_library(migraphx_device
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
......@@ -124,6 +125,7 @@ add_library(migraphx_gpu
compile_hip.cpp
compile_hip_code_object.cpp
compile_pointwise.cpp
compile_roialign.cpp
concat.cpp
convert.cpp
convolution.cpp
......@@ -145,6 +147,7 @@ add_library(migraphx_gpu
leaky_relu.cpp
mlir_conv.cpp
multinomial.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
pad.cpp
......@@ -202,6 +205,7 @@ register_migraphx_gpu_ops(hip_
min
mul
multinomial
nonzero
pad
pow
prelu
......
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/env.hpp>
#include <cassert>
#include <iostream>
......@@ -230,6 +231,20 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
return {compiler.compile(srcs)};
}
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
#endif // MIGRAPHX_USE_HIPRTC
} // namespace gpu
......
#include <migraphx/gpu/compile_pointwise.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
......@@ -28,20 +29,6 @@ int main() {}
)__migraphx__";
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local = 1024)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
operation compile_pointwise(context&, const std::vector<shape>& inputs, const std::string& lambda)
{
hip_compile_options options;
......
#include <migraphx/gpu/compile_roialign.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
// NOLINTNEXTLINE
static const char* const roialign_kernel = R"__migraphx__(
#include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <args.hpp>
using namespace migraphx;
extern "C" {
__global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y)
{
make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) { roialign(xs...); });
}
}
int main() {}
)__migraphx__";
operation compile_roialign(context&, const std::vector<shape>& io_shapes, const value& val)
{
hip_compile_options options;
auto out_s = io_shapes.back();
options.local = 128;
options.global = compute_global(out_s.elements(), options.local);
options.inputs = io_shapes;
options.output = out_s;
options.kernel_name = "roialign_kernel";
options.reduced_inputs = io_shapes;
// sampling_ratio
assert(val.contains("sampling_ratio"));
auto sampling_ratio = val.at("sampling_ratio").to<int64_t>();
options.params += " -DSAMPLING_RATIO=" + std::to_string(sampling_ratio);
// pooling_mode
assert(val.contains("mode"));
auto mode = val.at("mode").to<std::string>();
bool is_avg_pooling = (mode == "avg");
options.params += " -DIS_AVG_POOLING=" + std::to_string(static_cast<int>(is_avg_pooling));
// coord_trans_mode
assert(val.contains("coordinate_transformation_mode"));
auto ctm = val.at("coordinate_transformation_mode").to<std::string>();
float rois_offset = (ctm == "output_half_pixel") ? -0.5f : 0.0f;
options.params += " -DROIS_OFFSET=" + std::to_string(rois_offset);
// spatial_scale
assert(val.contains("spatial_scale"));
float spatial_scale = val.at("spatial_scale").to<float>();
options.params += " -DSPATIAL_SCALE=" + std::to_string(spatial_scale);
return compile_hip_code_object(roialign_kernel, options);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment