"src/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "056318a0ae0441d71fa2aa589f18d779940d070b"
Commit df032e06 authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into mlir-c

parents cf4642cd 19f65e7e
......@@ -18,6 +18,7 @@ namespace op {
struct sign : unary<sign>
{
std::string point_op() const { return "(${0} > 0 ? 1 : ((${0} < 0) ? -1 : 0))"; }
auto apply() const
{
return [](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); };
......
......@@ -45,7 +45,7 @@ struct topk
shape s_val{type, lens};
shape s_ind{shape::int64_type, lens};
return shape({s_val, s_ind});
return {{s_val, s_ind}};
}
template <class T, class Compare>
......@@ -131,7 +131,7 @@ struct topk
});
});
return argument({res_val, res_ind});
return {{res_val, res_ind}};
}
};
......
......@@ -103,7 +103,7 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
} // namespace operation_operators
template <class T>
auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs))
{
dependent_type<operation, T> y = x;
......@@ -111,6 +111,13 @@ auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& i
return any_cast<T>(y).normalize_compute_shape(inputs);
}
template <class T>
auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs, {}))
{
return x.compute_shape(inputs, {});
}
template <class T>
shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
{
......@@ -121,7 +128,7 @@ shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
template <class T>
shape normalize_compute_shape_op(const T& x, const std::vector<shape>& inputs)
{
return normalize_compute_shape_op(rank<1>{}, x, inputs);
return normalize_compute_shape_op(rank<2>{}, x, inputs);
}
template <class T>
......
......@@ -57,6 +57,7 @@
#include <migraphx/op/mul.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/neg.hpp>
#include <migraphx/op/nonmaxsuppression.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/op/outline.hpp>
#include <migraphx/op/pad.hpp>
......@@ -80,6 +81,7 @@
#include <migraphx/op/rnn_last_hs_output.hpp>
#include <migraphx/op/rnn_variable_seq_lens.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/roialign.hpp>
#include <migraphx/op/round.hpp>
#include <migraphx/op/rsqrt.hpp>
#include <migraphx/op/scalar.hpp>
......
......@@ -67,7 +67,8 @@ struct program
void finalize();
void perf_report(std::ostream& os, std::size_t n, parameter_map params) const;
void
perf_report(std::ostream& os, std::size_t n, parameter_map params, std::size_t batch = 1) const;
void mark(const parameter_map& params, marker&& m);
......
......@@ -106,7 +106,7 @@ argument run_loop(const LoopModel& model,
std::copy(in_args.begin() + 2, in_args.end(), out_args.begin());
model.set_zero(ctx, scan_outputs, iter);
return argument(out_args);
return {out_args};
}
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -18,7 +18,7 @@ inline namespace MIGRAPHX_INLINE_NS {
template <class F>
auto with_char(F f)
{
return [=](unsigned char c) { return f(c); };
return [=](unsigned char c) -> bool { return f(c); };
}
inline std::string
......@@ -71,7 +71,7 @@ std::string trim(const std::string& s, F f)
{
auto start = std::find_if_not(s.begin(), s.end(), f);
auto last = std::find_if_not(s.rbegin(), std::string::const_reverse_iterator(start), f).base();
return std::string(start, last);
return {start, last};
}
inline std::string trim(const std::string& s)
......@@ -120,22 +120,27 @@ interpolate_string(const std::string& input, F f, std::string start = "${", std:
result.append(it, next_start);
if(next_start == input.end())
break;
auto r = f(next_start + start.size(), next_end - end.size() + 1);
auto r = f(next_start + start.size(), next_end);
result.append(r.begin(), r.end());
it = next_end + 1;
it = next_end + end.size();
}
return result;
}
inline std::string interpolate_string(const std::string& input,
const std::unordered_map<std::string, std::string>& vars)
{
return interpolate_string(input, [&](auto start, auto last) {
auto key = trim({start, last});
auto it = vars.find(key);
if(it == vars.end())
throw std::runtime_error("Unknown key: " + key);
return it->second;
});
const std::unordered_map<std::string, std::string>& vars,
std::string start = "${",
std::string end = "}")
{
return interpolate_string(input,
[&](auto start_it, auto last_it) {
auto key = trim({start_it, last_it});
auto it = vars.find(key);
if(it == vars.end())
throw std::runtime_error("Unknown key: " + key);
return it->second;
},
std::move(start),
std::move(end));
}
template <class Iterator>
......
......@@ -32,6 +32,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
{"Log", "log"},
{"LRN", "lrn"},
{"Neg", "neg"},
{"NonMaxSuppression", "nonmaxsuppression"},
{"Reciprocal", "recip"},
{"Relu", "relu"},
{"Round", "round"},
......@@ -49,7 +50,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
bool needs_contiguous(const std::string& op_name) const
{
return contains({"flatten", "gather", "scatter"}, op_name);
return contains({"flatten", "gather", "nonmaxsuppression", "scatter"}, op_name);
}
instruction_ref parse(const op_desc& opd,
......
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_roialign : op_parser<parse_roialign>
{
std::vector<op_desc> operators() const { return {{"RoiAlign"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
const std::vector<instruction_ref>& args) const
{
std::string coord_trans_mode = "half_pixel";
if(contains(info.attributes, "coordinate_transformation_mode"))
{
coord_trans_mode = info.attributes.at("coordinate_transformation_mode").s();
}
if(not contains({"half_pixel", "output_half_pixel"}, coord_trans_mode))
{
MIGRAPHX_THROW("coordinate_transformation_mode \"" + coord_trans_mode +
"\": invalid value!");
}
std::string mode = "avg";
if(contains(info.attributes, "mode"))
{
mode = info.attributes.at("mode").s();
}
int64_t output_height = 1;
if(contains(info.attributes, "output_height"))
{
output_height = info.attributes.at("output_height").i();
}
int64_t output_width = 1;
if(contains(info.attributes, "output_width"))
{
output_width = info.attributes.at("output_width").i();
}
int64_t sampling_ratio = 0;
if(contains(info.attributes, "sampling_ratio"))
{
sampling_ratio = info.attributes.at("sampling_ratio").i();
}
float spatial_scale = 1.0f;
if(contains(info.attributes, "spatial_scale"))
{
spatial_scale = info.attributes.at("spatial_scale").f();
}
return info.add_instruction(make_op("roialign",
{{"coordinate_transformation_mode", coord_trans_mode},
{"mode", mode},
{"output_height", output_height},
{"output_width", output_width},
{"sampling_ratio", sampling_ratio},
{"spatial_scale", spatial_scale}}),
args);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -526,7 +526,10 @@ void program::mark(const parameter_map& params, marker&& m)
m.mark_stop(*this);
}
void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) const
void program::perf_report(std::ostream& os,
std::size_t n,
parameter_map params,
std::size_t batch) const
{
auto& ctx = this->impl->ctx;
// Run once by itself
......@@ -619,7 +622,8 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
os << std::endl;
os << "Rate: " << rate << "/sec" << std::endl;
os << "Batch size: " << batch << std::endl;
os << "Rate: " << rate * batch << "/sec" << std::endl;
os << "Total time: " << total_time << "ms" << std::endl;
os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
os << "Overhead time: " << overhead_time << "ms"
......
File mode changed from 100755 to 100644
......@@ -539,6 +539,46 @@ struct find_reshape_cont
}
};
// match sequence of transpose --> contiguous --> reshaper_op
auto match_transpose_contiguous_reshaper()
{
return match::name({"reshape", "squeeze", "unsqueeze"})(
match::used_once(),
match::args(
match::name("contiguous")(
match::used_once(), match::args(match::transpose_shape().bind("trans_ins")))
.bind("cont_ins")))
.bind("reshaper_ins");
};
// finds the pattern of transpose --> contiguous --> reshaper_op --> unary
// application of this matcher moves the unary operation before the contiguous so it becomes
// transpose --> unary --> contiguous --> reshaper_op. later pointwise sub-module can be created out
// of unary --> contiguous --> reshaper_op. Such pattern appears in depthToSpace or spaceToDepth
// operator.
struct find_transpose_contiguous_reshaper_unary
{
auto matcher() const
{
return pointwise(match::used_once(),
match::nargs(1),
match::args(match_transpose_contiguous_reshaper()));
}
void apply(module& p, match::matcher_result r) const
{
auto ins = r.result;
auto reshaper_ins = r.instructions["reshaper_ins"];
auto trans_ins = r.instructions["trans_ins"];
auto cont_ins = r.instructions["cont_ins"];
auto unary_op_name = ins->get_operator().name();
auto unary_ins = p.insert_instruction(cont_ins, make_op(unary_op_name), trans_ins);
auto new_cont_ins = p.insert_instruction(cont_ins, make_op("contiguous"), unary_ins);
// older cont and reshape are removed by deadcode elimination
p.replace_instruction(ins, reshaper_ins->get_operator(), new_cont_ins);
}
};
void simplify_reshapes::apply(module& p) const
{
for(int i = 0; i < 2; i++)
......@@ -553,7 +593,8 @@ void simplify_reshapes::apply(module& p) const
find_concat_transpose{},
find_nested_convert{},
find_nested_slice{},
find_nested_concat{});
find_nested_concat{},
find_transpose_contiguous_reshaper_unary{});
dead_code_elimination{}.apply(p);
}
}
......
......@@ -73,7 +73,7 @@ dnnl::memory::desc to_dnnl_memory_desc(const shape& s)
dnnl::memory to_dnnl_memory(const dnnl::memory::desc& desc, const argument& a)
{
return dnnl::memory(desc, get_dnnl_context().engine, a.data());
return {desc, get_dnnl_context().engine, a.data()};
}
dnnl::memory to_dnnl_memory(const argument& a)
......
......@@ -122,9 +122,11 @@ add_library(migraphx_gpu
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
compile_ops.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_pointwise.cpp
compile_roialign.cpp
concat.cpp
convert.cpp
convolution.cpp
......
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/module.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/env.hpp>
#include <cassert>
#include <iostream>
......@@ -230,6 +231,20 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
return {compiler.compile(srcs)};
}
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
#endif // MIGRAPHX_USE_HIPRTC
} // namespace gpu
......
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/module.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/gpu/compile_pointwise.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct precompile_op
{
operation op = op::identity{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"));
}
std::string name() const { return "gpu::precompile_op"; }
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
inputs.pop_back();
return op.compute_shape(inputs, mods);
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
MIGRAPHX_REGISTER_OP(precompile_op);
struct pointwise_compiler
{
std::string name() const { return "pointwise"; }
operation apply(context& ctx, instruction_ref ins, const operation&) const
{
assert(not ins->module_inputs().empty());
auto* pm = ins->module_inputs().front();
return compile_pointwise(ctx, to_shapes(ins->inputs()), *pm);
}
};
using compiler_function = std::function<operation(context&, instruction_ref, operation)>;
template <class T>
compiler_function make_compiler_function(T x)
{
return {[=](auto&&... xs) { return x.apply(xs...); }};
}
template <class... Ts>
std::unordered_map<std::string, compiler_function> make_compilers(Ts... xs)
{
return {{xs.name(), make_compiler_function(xs)}...};
}
void compile_ops::apply(module& m) const
{
auto compilers = make_compilers(pointwise_compiler{});
for(auto ins : iterator_for(m))
{
if(ins->name() != "gpu::precompile_op")
continue;
operation preop = any_cast<precompile_op>(ins->get_operator()).op;
assert(contains(compilers, preop.name()));
auto op = compilers[preop.name()](*ctx, ins, preop);
m.replace_instruction(ins, op, ins->inputs());
}
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/compile_pointwise.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -16,6 +22,8 @@ static const char* const pointwise_kernel = R"__migraphx__(
using namespace migraphx;
${preamble}
extern "C" {
__global__ void kernel(${params})
{
......@@ -28,21 +36,10 @@ int main() {}
)__migraphx__";
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local = 1024)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
operation compile_pointwise(context&, const std::vector<shape>& inputs, const std::string& lambda)
operation compile_pointwise(context&,
const std::vector<shape>& inputs,
const std::string& lambda,
const std::string& preamble)
{
hip_compile_options options;
options.global = compute_global(inputs.front().elements());
......@@ -50,13 +47,23 @@ operation compile_pointwise(context&, const std::vector<shape>& inputs, const st
options.inputs = inputs;
options.output = inputs.back();
options.reduced_inputs = reduce_dims(inputs);
options.params = "-Wno-float-equal";
auto src = interpolate_string(pointwise_kernel,
{{"params", enum_params(inputs.size(), "void * private_p")},
{"args", enum_params(inputs.size(), "private_p")},
{"lambda", lambda}});
{"lambda", lambda},
{"preamble", preamble}});
return compile_hip_code_object(src, options);
}
operation compile_pointwise(context& ctx, const std::vector<shape>& inputs, module m)
{
run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}});
cpp_generator g;
auto name = g.create_function(g.generate_module(m).set_attributes({"__device__"}));
return compile_pointwise((ctx), inputs, "&" + name, g.str());
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/compile_roialign.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
// NOLINTNEXTLINE
static const char* const roialign_kernel = R"__migraphx__(
#include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <args.hpp>
using namespace migraphx;
extern "C" {
__global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y)
{
make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) { roialign(xs...); });
}
}
int main() {}
)__migraphx__";
operation compile_roialign(context&, const std::vector<shape>& io_shapes, const value& val)
{
hip_compile_options options;
auto out_s = io_shapes.back();
options.local = 128;
options.global = compute_global(out_s.elements(), options.local);
options.inputs = io_shapes;
options.output = out_s;
options.kernel_name = "roialign_kernel";
options.reduced_inputs = io_shapes;
// sampling_ratio
assert(val.contains("sampling_ratio"));
auto sampling_ratio = val.at("sampling_ratio").to<int64_t>();
options.params += " -DSAMPLING_RATIO=" + std::to_string(sampling_ratio);
// pooling_mode
assert(val.contains("mode"));
auto mode = val.at("mode").to<std::string>();
bool is_avg_pooling = (mode == "avg");
options.params += " -DIS_AVG_POOLING=" + std::to_string(static_cast<int>(is_avg_pooling));
// coord_trans_mode
assert(val.contains("coordinate_transformation_mode"));
auto ctm = val.at("coordinate_transformation_mode").to<std::string>();
float rois_offset = (ctm == "output_half_pixel") ? -0.5f : 0.0f;
options.params += " -DROIS_OFFSET=" + std::to_string(rois_offset);
// spatial_scale
assert(val.contains("spatial_scale"));
float spatial_scale = val.at("spatial_scale").to<float>();
options.params += " -DSPATIAL_SCALE=" + std::to_string(spatial_scale);
return compile_hip_code_object(roialign_kernel, options);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -5,6 +5,7 @@
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/add.hpp>
#include <migraphx/gpu/mul.hpp>
......@@ -26,6 +27,7 @@
#include <migraphx/array.hpp>
#include <migraphx/op/clip.hpp>
#include <cmath>
#include <set>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -152,6 +154,12 @@ struct fusion
}
};
const std::unordered_set<std::string>& get_supported_archs()
{
static std::unordered_set<std::string> supported_archs{"gfx900", "gfx906", "gfx908", "gfx1030"};
return supported_archs;
}
MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
{
auto&& s = ins->get_shape();
......@@ -161,6 +169,9 @@ MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
{
const auto device_name = split_string(get_device_name(), ':').front();
if(not contains(get_supported_archs(), device_name))
return false;
if(enabled(MIGRAPHX_DISABLE_MIOPEN_FUSION{}))
return false;
if(ins->name() != "gpu::convolution")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment