Commit 712f6134 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

merge changes from develop branch and resolve merge conflicts

parents 4a39a0f7 b20e3d4d
......@@ -42,11 +42,6 @@ struct parse_randomnormal_ops : op_parser<parse_randomnormal_ops>
if(contains(info.attributes, "scale"))
scale = info.attributes.at("scale").f();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
shape out_shape;
if(contains(info.attributes, "shape"))
{
......@@ -75,7 +70,10 @@ struct parse_randomnormal_ops : op_parser<parse_randomnormal_ops>
": cannot deduce shape without shape attribute or argument.");
}
std::mt19937 gen(seed);
std::mt19937 gen(std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
gen.seed(info.attributes.at("seed").f());
std::normal_distribution<> d(mean, scale);
std::vector<double> rand_vals(out_shape.elements());
std::generate(rand_vals.begin(), rand_vals.end(), [&]() { return d(gen); });
......
......@@ -42,11 +42,6 @@ struct parse_randomuniform_ops : op_parser<parse_randomuniform_ops>
if(contains(info.attributes, "low"))
low = info.attributes.at("low").f();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
shape out_shape;
if(contains(info.attributes, "shape"))
{
......@@ -75,7 +70,10 @@ struct parse_randomuniform_ops : op_parser<parse_randomuniform_ops>
": cannot deduce shape without shape attribute or argument.");
}
std::mt19937 gen(seed);
std::mt19937 gen(std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
gen.seed(info.attributes.at("seed").f());
std::uniform_real_distribution<> d(high, low);
std::vector<double> rand_vals(out_shape.elements());
std::generate(rand_vals.begin(), rand_vals.end(), [&]() { return d(gen); });
......
......@@ -163,9 +163,9 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr)
struct parse_resize : op_parser<parse_resize>
{
std::vector<op_desc> operators() const { return {{"Resize"}}; }
std::vector<op_desc> operators() const { return {{"Resize"}, {"Upsample"}}; }
instruction_ref parse(const op_desc& /*opd*/,
instruction_ref parse(const op_desc& opd,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
std::vector<instruction_ref> args) const
......@@ -183,7 +183,7 @@ struct parse_resize : op_parser<parse_resize>
if(contains(info.attributes, "exclude_outside") and
info.attributes.at("exclude_outside").i() == 1)
{
MIGRAPHX_THROW("PARSE_RESIZE: exclude_outside 1 is not supported!");
MIGRAPHX_THROW("PARSE_" + opd.op_name + ": exclude_outside 1 is not supported!");
}
// input data shape info
......@@ -215,12 +215,14 @@ struct parse_resize : op_parser<parse_resize>
if(type == shape::int64_type)
{
auto arg_out_s = arg->eval();
check_arg_empty(arg_out_s, "PARSE_RESIZE: dynamic output size is not supported!");
check_arg_empty(arg_out_s,
"PARSE_" + opd.op_name + ": dynamic output size is not supported!");
arg_out_s.visit([&](auto ol) { out_lens.assign(ol.begin(), ol.end()); });
if(out_lens.size() != in_lens.size())
{
MIGRAPHX_THROW("PARSE_RESIZE: specified output size does not match input size");
MIGRAPHX_THROW("PARSE_" + opd.op_name +
": specified output size does not match input size");
}
// compute the scale
......@@ -239,12 +241,14 @@ struct parse_resize : op_parser<parse_resize>
{
auto arg_scale = arg->eval();
check_arg_empty(arg_scale,
"PARSE_RESIZE: dynamic input scale is not supported!");
"PARSE_" + opd.op_name +
": dynamic input scale is not supported!");
arg_scale.visit([&](auto v) { vec_scale.assign(v.begin(), v.end()); });
if(in_lens.size() != vec_scale.size())
{
MIGRAPHX_THROW("PARSE_RESIZE: ranks of input and scale are different!");
MIGRAPHX_THROW("PARSE_" + opd.op_name +
": ranks of input and scale are different!");
}
std::transform(in_lens.begin(),
......@@ -334,7 +338,7 @@ struct parse_resize : op_parser<parse_resize>
auto ins_delta = info.add_literal(dim_s, delta_data);
// slice the data
int64_t slc_stride = static_cast<int64_t>(dim_lens[0]);
int64_t slc_stride = dim_lens[0];
auto low = info.add_instruction(
make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {slc_stride}}}),
data);
......
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_roialign : op_parser<parse_roialign>
{
std::vector<op_desc> operators() const { return {{"RoiAlign"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
const std::vector<instruction_ref>& args) const
{
std::string coord_trans_mode = "half_pixel";
if(contains(info.attributes, "coordinate_transformation_mode"))
{
coord_trans_mode = info.attributes.at("coordinate_transformation_mode").s();
}
if(not contains({"half_pixel", "output_half_pixel"}, coord_trans_mode))
{
MIGRAPHX_THROW("coordinate_transformation_mode \"" + coord_trans_mode +
"\": invalid value!");
}
std::string mode = "avg";
if(contains(info.attributes, "mode"))
{
mode = info.attributes.at("mode").s();
}
int64_t output_height = 1;
if(contains(info.attributes, "output_height"))
{
output_height = info.attributes.at("output_height").i();
}
int64_t output_width = 1;
if(contains(info.attributes, "output_width"))
{
output_width = info.attributes.at("output_width").i();
}
int64_t sampling_ratio = 0;
if(contains(info.attributes, "sampling_ratio"))
{
sampling_ratio = info.attributes.at("sampling_ratio").i();
}
float spatial_scale = 1.0f;
if(contains(info.attributes, "spatial_scale"))
{
spatial_scale = info.attributes.at("spatial_scale").f();
}
return info.add_instruction(make_op("roialign",
{{"coordinate_transformation_mode", coord_trans_mode},
{"mode", mode},
{"output_height", output_height},
{"output_width", output_width},
{"sampling_ratio", sampling_ratio},
{"spatial_scale", spatial_scale}}),
args);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_softplus : op_parser<parse_softplus>
{
std::vector<op_desc> operators() const { return {{"Softplus"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
// Apply pointwise formula: y = ln(exp(x) + 1)
auto mb_ones = info.add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
info.add_literal(migraphx::literal{migraphx::shape{args[0]->get_shape().type()}, {1}}));
auto exp = info.add_instruction(migraphx::make_op("exp"), args[0]);
auto add = info.add_instruction(migraphx::make_op("add"), exp, mb_ones);
return info.add_instruction(migraphx::make_op("log"), add);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_softsign : op_parser<parse_softsign>
{
std::vector<op_desc> operators() const { return {{"Softsign"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
// Apply pointwise formula: y = x / (1 + |x|)
auto mb_ones = info.add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
info.add_literal(migraphx::literal{migraphx::shape{args[0]->get_shape().type()}, {1}}));
auto abs = info.add_instruction(migraphx::make_op("abs"), args[0]);
auto add = info.add_instruction(migraphx::make_op("add"), abs, mb_ones);
return info.add_instruction(migraphx::make_op("div"), args[0], add);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_spacetodepth : op_parser<parse_spacetodepth>
{
std::vector<op_desc> operators() const { return {{"SpaceToDepth"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
auto s = args[0]->get_shape();
// blocksize attribute of SpaceToDepth
int blocksize = 1; // if blockSize of 1 then, this is a no-op
if(contains(info.attributes, "blocksize"))
{
blocksize = info.attributes.at("blocksize").i();
}
if(blocksize < 1)
{
// blockSize less than 1 would rather result in DepthToSpace instead of SpaceToDepth
MIGRAPHX_THROW("SpaceToDepth: blocksize is less than 1");
}
// calculate dimensions
auto res_lens = s.lens(); // {N, C, H, W}
if(((res_lens[2] % blocksize) == 0) and ((res_lens[3] % blocksize) == 0))
{
// Co = C * (blocksize ^ 2)
res_lens[1] = res_lens[1] * blocksize * blocksize;
// Ho = (H / blocksize)
res_lens[2] = res_lens[2] / blocksize;
// Wo = (W / blocksize)
res_lens[3] = res_lens[3] / blocksize;
} // res_shape = (N, Co, Ho, Wo)
else
MIGRAPHX_THROW("SpaceToDepth: div by blocksize quotient not int ");
auto trans_lens = s.lens(); // {N, C, H, W}
trans_lens[2] = res_lens[2];
trans_lens[3] = blocksize;
trans_lens.push_back(res_lens[3]);
trans_lens.push_back(blocksize); // {N, C, Ho, blocksize, Wo, blocksize}
std::vector<int64_t> perm = {0, 3, 5, 1, 2, 4};
auto temp1 = info.add_instruction(make_op("reshape", {{"dims", trans_lens}}), args[0]);
auto temp2 = info.add_instruction(make_op("transpose", {{"permutation", perm}}), temp1);
return info.add_instruction(make_op("reshape", {{"dims", res_lens}}),
info.make_contiguous(temp2));
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -24,7 +24,7 @@ struct parse_split : op_parser<parse_split>
}
auto lens = args[0]->get_shape().lens();
int64_t n_rank = static_cast<int64_t>(lens.size());
int64_t n_rank = lens.size();
int64_t tuned_axis = tune_axis(n_rank, axis, opd.op_name);
std::vector<int64_t> vec_splits;
......
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_upsample : op_parser<parse_upsample>
{
std::vector<op_desc> operators() const { return {{"Upsample"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
std::vector<instruction_ref> args) const
{
if(contains(info.attributes, "mode"))
{
auto mode = info.attributes.at("mode").s();
if(mode != "nearest")
{
MIGRAPHX_THROW("PARSE_UPSAMPLE: only nearest mode is supported!");
}
}
auto arg_scale = args[1]->eval();
check_arg_empty(arg_scale, "PARSE_UPSAMPLE: only constant scale is supported!");
std::vector<float> vec_scale;
arg_scale.visit([&](auto v) { vec_scale.assign(v.begin(), v.end()); });
auto in_s = args[0]->get_shape();
auto in_lens = in_s.lens();
if(in_lens.size() != vec_scale.size())
{
MIGRAPHX_THROW("PARSE_UPSAMPLE: ranks of input and scale are different!");
}
std::vector<std::size_t> out_lens(in_lens.size());
std::transform(in_lens.begin(),
in_lens.end(),
vec_scale.begin(),
out_lens.begin(),
[&](auto idx, auto scale) { return static_cast<std::size_t>(idx * scale); });
std::vector<float> idx_scale(in_lens.size());
std::transform(
out_lens.begin(),
out_lens.end(),
in_lens.begin(),
idx_scale.begin(),
[](auto od, auto id) { return (od == id) ? 1.0f : (id - 1.0f) / (od - 1.0f); });
shape out_s{in_s.type(), out_lens};
std::vector<int> ind(out_s.elements());
// map out_idx to in_idx
shape_for_each(out_s, [&](auto idx) {
auto in_idx = idx;
std::transform(idx.begin(),
idx.end(),
idx_scale.begin(),
in_idx.begin(),
// nearest mode
[](auto index, auto scale) {
return static_cast<std::size_t>(std::round(index * scale));
});
ind[out_s.index(idx)] = static_cast<int64_t>(in_s.index(in_idx));
});
// reshape input to one-dimension
std::vector<int64_t> rsp_lens = {static_cast<int64_t>(in_s.elements())};
shape ind_s{shape::int32_type, out_lens};
auto rsp = info.add_instruction(make_op("reshape", {{"dims", rsp_lens}}), args[0]);
auto ins_ind = info.add_literal(literal(ind_s, ind));
return info.add_instruction(make_op("gather", {{"axis", 0}}), rsp, ins_ind);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -15,6 +15,8 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_PASSES);
void validate_pass(module& mod, const pass& p, tracer trace)
{
(void)mod;
......@@ -82,6 +84,8 @@ module& get_module(module_pass_manager& mpm) { return mpm.get_module(); }
void run_passes(module& mod, const std::vector<pass>& passes, tracer trace)
{
if(enabled(MIGRAPHX_TRACE_PASSES{}))
trace = tracer{std::cout};
for(const auto& p : passes)
{
module_pm{&mod, nullptr, &trace}.run_pass(p);
......@@ -90,6 +94,8 @@ void run_passes(module& mod, const std::vector<pass>& passes, tracer trace)
void run_passes(program& prog, const std::vector<pass>& passes, tracer trace)
{
if(enabled(MIGRAPHX_TRACE_PASSES{}))
trace = tracer{std::cout};
for(const auto& p : passes)
{
auto mods = prog.get_modules();
......
......@@ -13,6 +13,7 @@
#include <migraphx/algorithm.hpp>
#include <migraphx/output_iterator.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/marker.hpp>
#include <iostream>
#include <sstream>
#include <algorithm>
......@@ -179,6 +180,63 @@ void program::finalize()
mm->finalize(this->impl->ctx);
}
template <class T>
std::string classify(T x)
{
switch(std::fpclassify(x))
{
case FP_INFINITE: return "inf";
case FP_NAN: return "nan";
case FP_NORMAL: return "normal";
case FP_SUBNORMAL: return "subnormal";
case FP_ZERO: return "zero";
default: return "unknown";
}
}
std::unordered_set<std::string> classify_argument(const argument& a)
{
std::unordered_set<std::string> result;
a.visit(
[&](auto t) {
for(const auto& x : t)
result.insert(classify(x));
},
[&](const auto& xs) {
for(const auto& x : xs)
{
auto r = classify_argument(x);
result.insert(r.begin(), r.end());
}
});
return result;
}
void preview_argument(std::ostream& os, const argument& a)
{
a.visit(
[&](auto t) {
if(t.size() <= 10)
{
os << t;
}
else
{
os << to_string_range(t.begin(), t.begin() + 5);
os << ", ..., ";
os << to_string_range(t.end() - 5, t.end());
}
},
[&](const auto& xs) {
for(const auto& x : xs)
{
os << '{';
preview_argument(os, x);
os << '}';
}
});
}
template <class F>
std::vector<argument> generic_eval(const module* mod,
context& ctx,
......@@ -309,8 +367,24 @@ std::vector<argument> program::eval(parameter_map params) const
double t2 = t.record<milliseconds>();
std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl;
if(trace_level > 1 and ins->name().front() != '@' and
ins->name() != "load")
std::cout << "Output: " << result << std::endl;
ins->name() != "load" and not result.empty())
{
target tgt = make_target(this->impl->target_name);
auto buffer = tgt.copy_from(result);
if(trace_level == 2)
{
std::cout << "Output has "
<< to_string_range(classify_argument(buffer))
<< std::endl;
std::cout << "Output: ";
preview_argument(std::cout, buffer);
std::cout << std::endl;
}
else
{
std::cout << "Output: " << buffer << std::endl;
}
}
return result;
}));
}
......@@ -504,7 +578,28 @@ std::string perf_group(const operation& op)
return op.name();
}
void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) const
void program::mark(const parameter_map& params, marker&& m)
{
auto& ctx = this->impl->ctx;
// Run once by itself
eval(params);
ctx.finish();
// Start marking
m.mark_start(*this);
generic_eval(*this, ctx, params, always([&](auto ins, auto f) {
argument result;
m.mark_start(ins);
result = f();
m.mark_stop(ins);
return result;
}));
m.mark_stop(*this);
}
void program::perf_report(std::ostream& os,
std::size_t n,
parameter_map params,
std::size_t batch) const
{
auto& ctx = this->impl->ctx;
// Run once by itself
......@@ -597,7 +692,8 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
os << std::endl;
os << "Rate: " << rate << "/sec" << std::endl;
os << "Batch size: " << batch << std::endl;
os << "Rate: " << rate * batch << "/sec" << std::endl;
os << "Total time: " << total_time << "ms" << std::endl;
os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
os << "Overhead time: " << overhead_time << "ms"
......
......@@ -269,7 +269,7 @@ std::vector<instruction_ref> rewrite_rnn::vanilla_rnn_cell(bool is_forward,
instruction_ref hidden_out = prog.end();
instruction_ref last_out{};
last_out = prog.insert_instruction(ins, make_op("unsqueeze", {{"axes", {0, 1}}}), sih);
long seq_len = static_cast<long>(get_seq_len(prog, seq, seq_lens));
long seq_len = get_seq_len(prog, seq, seq_lens);
for(long i = 0; i < seq_len; i++)
{
long seq_index = is_forward ? i : (seq_len - 1 - i);
......@@ -556,7 +556,7 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
instruction_ref last_output{};
migraphx::shape seq_shape = seq->get_shape();
migraphx::shape r_shape = r->get_shape();
long hs = static_cast<long>(r_shape.lens()[2]);
long hs = r_shape.lens()[2];
migraphx::shape ss(seq_shape.type(), {seq_shape.lens()[1], r_shape.lens()[2]});
std::vector<float> data(ss.elements(), 1.0f);
......@@ -613,7 +613,7 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
rb_h);
}
long seq_len = static_cast<long>(get_seq_len(prog, seq, seq_lens));
long seq_len = get_seq_len(prog, seq, seq_lens);
for(long i = 0; i < seq_len; i++)
{
long seq_index = is_forward ? i : (seq_len - 1 - i);
......@@ -1032,7 +1032,7 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
instruction_ref last_cell_output{};
migraphx::shape r_shape = r->get_shape();
long hs = static_cast<long>(r_shape.lens()[2]);
long hs = r_shape.lens()[2];
auto bs = ih->get_shape().lens()[1];
std::vector<int64_t> perm{1, 0};
......@@ -1094,7 +1094,7 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
ins, make_op("broadcast", {{"axis", 1}, {"out_lens", ic_lens}}), pphf);
}
long seq_len = static_cast<long>(get_seq_len(prog, seq, seq_lens));
long seq_len = get_seq_len(prog, seq, seq_lens);
for(long i = 0; i < seq_len; ++i)
{
long seq_index = is_forward ? i : (seq_len - 1 - i);
......
......@@ -539,6 +539,46 @@ struct find_reshape_cont
}
};
// match sequence of transpose --> contiguous --> reshaper_op
auto match_transpose_contiguous_reshaper()
{
return match::name({"reshape", "squeeze", "unsqueeze"})(
match::used_once(),
match::args(
match::name("contiguous")(
match::used_once(), match::args(match::transpose_shape().bind("trans_ins")))
.bind("cont_ins")))
.bind("reshaper_ins");
};
// finds the pattern of transpose --> contiguous --> reshaper_op --> unary
// application of this matcher moves the unary operation before the contiguous so it becomes
// transpose --> unary --> contiguous --> reshaper_op. later pointwise sub-module can be created out
// of unary --> contiguous --> reshaper_op. Such pattern appears in depthToSpace or spaceToDepth
// operator.
struct find_transpose_contiguous_reshaper_unary
{
auto matcher() const
{
return pointwise(match::used_once(),
match::nargs(1),
match::args(match_transpose_contiguous_reshaper()));
}
void apply(module& p, match::matcher_result r) const
{
auto ins = r.result;
auto reshaper_ins = r.instructions["reshaper_ins"];
auto trans_ins = r.instructions["trans_ins"];
auto cont_ins = r.instructions["cont_ins"];
auto unary_op_name = ins->get_operator().name();
auto unary_ins = p.insert_instruction(cont_ins, make_op(unary_op_name), trans_ins);
auto new_cont_ins = p.insert_instruction(cont_ins, make_op("contiguous"), unary_ins);
// older cont and reshape are removed by deadcode elimination
p.replace_instruction(ins, reshaper_ins->get_operator(), new_cont_ins);
}
};
void simplify_reshapes::apply(module& p) const
{
for(int i = 0; i < 2; i++)
......@@ -553,7 +593,8 @@ void simplify_reshapes::apply(module& p) const
find_concat_transpose{},
find_nested_convert{},
find_nested_slice{},
find_nested_concat{});
find_nested_concat{},
find_transpose_contiguous_reshaper_unary{});
dead_code_elimination{}.apply(p);
}
}
......
......@@ -33,8 +33,6 @@ rocm_set_soversion(migraphx_cpu ${MIGRAPHX_SO_VERSION})
set(MIGRAPHX_ENABLE_ZENDNN Off CACHE BOOL "")
find_package(Threads)
if(MIGRAPHX_ENABLE_ZENDNN)
find_path(ZENDNN_INC_PATH zendnn.hpp)
find_library(ZENDNN_LIB amdZenDNN)
......@@ -53,7 +51,7 @@ if(MIGRAPHX_ENABLE_ZENDNN)
else()
target_link_libraries(migraphx_cpu PRIVATE DNNL::dnnl)
endif()
target_link_libraries(migraphx_cpu PRIVATE migraphx Threads::Threads)
target_link_libraries(migraphx_cpu PRIVATE migraphx)
find_package(OpenMP)
target_link_libraries(migraphx_cpu PUBLIC OpenMP::OpenMP_CXX)
......
......@@ -73,7 +73,7 @@ dnnl::memory::desc to_dnnl_memory_desc(const shape& s)
dnnl::memory to_dnnl_memory(const dnnl::memory::desc& desc, const argument& a)
{
return dnnl::memory(desc, get_dnnl_context().engine, a.data());
return {desc, get_dnnl_context().engine, a.data()};
}
dnnl::memory to_dnnl_memory(const argument& a)
......
......@@ -91,28 +91,34 @@ add_library(migraphx_device
device/unary_not.cpp
device/where.cpp
)
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_compile_options(migraphx_device PRIVATE -std=c++17 -fno-gpu-rdc -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(migraphx_device migraphx hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
set(AMDGPU_TARGETS "gfx803;gfx900;gfx906" CACHE STRING "")
foreach(AMDGPU_TARGET ${AMDGPU_TARGETS})
target_compile_options(migraphx_device PRIVATE -amdgpu-target=${AMDGPU_TARGET})
target_link_libraries(migraphx_device -amdgpu-target=${AMDGPU_TARGET})
endforeach()
else()
target_compile_options(migraphx_device PRIVATE -Wno-cuda-compat)
endif()
add_library(compile_for_gpu INTERFACE)
target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
if(HAS_HIP_LAMBDA_HOST_DEVICE)
message(STATUS "Enable -fhip-lambda-host-device")
target_compile_options(migraphx_device PRIVATE -fhip-lambda-host-device)
target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device)
endif()
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries(migraphx_device PRIVATE compile_for_gpu)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(kernel_file_check EXCLUDE_FROM_ALL)
foreach(KERNEL_FILE ${KERNEL_FILES})
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
endforeach()
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check(kernel_file_check)
add_library(migraphx_gpu
abs.cpp
analyze_streams.cpp
......@@ -122,9 +128,11 @@ add_library(migraphx_gpu
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
compile_ops.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_pointwise.cpp
compile_roialign.cpp
concat.cpp
convert.cpp
convolution.cpp
......@@ -308,8 +316,12 @@ target_flags(HIP_COMPILER_FLAGS hip::device)
# Remove cuda arch flags
string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REGEX REPLACE --offload-arch=[a-z0-9:+-]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REPLACE "$<LINK_LANGUAGE:CXX>" "1" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REPLACE "SHELL:" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
# Skip library paths since hip will incorrectly treat it as a source file
string(APPEND HIP_COMPILER_FLAGS " ")
foreach(_unused RANGE 2)
string(REGEX REPLACE " /[^ ]+\\.(a|so) " " " HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
endforeach()
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
target_compile_definitions(migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}"
......@@ -339,7 +351,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver)
rocm_install_targets(
TARGETS migraphx_gpu migraphx_device
TARGETS migraphx_gpu migraphx_device compile_for_gpu
INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include
)
......
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/module.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
......@@ -9,7 +9,7 @@ namespace gpu {
shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).standard();
check_shapes{inputs, *this}.has(2);
return op.normalize_compute_shape({inputs.at(0)});
}
......
......@@ -9,7 +9,7 @@ namespace gpu {
shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).standard();
check_shapes{inputs, *this}.has(2);
return op.normalize_compute_shape({inputs.at(0)});
}
......
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/env.hpp>
#include <cassert>
#include <iostream>
......@@ -230,6 +231,20 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
return {compiler.compile(srcs)};
}
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
std::size_t compute_global(std::size_t n, std::size_t local)
{
std::size_t groups = (n + local - 1) / local;
std::size_t nglobal = std::min<std::size_t>(256, groups) * local;
return nglobal;
}
#endif // MIGRAPHX_USE_HIPRTC
} // namespace gpu
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment