Unverified Commit 7f97b8ef authored by Ted Themistokleous's avatar Ted Themistokleous Committed by GitHub
Browse files

Merge branch 'simplify_1_mul_div_ops' into divide_by_zero_check

parents 2ba401f0 d1fed367
...@@ -47,12 +47,12 @@ void rewrite_pooling::apply(module& m) const ...@@ -47,12 +47,12 @@ void rewrite_pooling::apply(module& m) const
if(not s.standard()) if(not s.standard())
continue; continue;
auto&& op = any_cast<op::pooling>(ins->get_operator()); auto&& op = any_cast<op::pooling>(ins->get_operator());
if(!std::all_of(op.padding.begin(), op.padding.end(), [](auto i) { return i == 0; })) if(not std::all_of(op.padding.begin(), op.padding.end(), [](auto i) { return i == 0; }))
continue; continue;
if(!std::all_of(op.stride.begin(), op.stride.end(), [](auto i) { return i == 1; })) if(not std::all_of(op.stride.begin(), op.stride.end(), [](auto i) { return i == 1; }))
continue; continue;
auto lens = s.lens(); auto lens = s.lens();
if(!std::equal(lens.begin() + 2, lens.end(), op.lengths.begin(), op.lengths.end())) if(not std::equal(lens.begin() + 2, lens.end(), op.lengths.begin(), op.lengths.end()))
continue; continue;
std::int64_t n = s.lens()[0]; std::int64_t n = s.lens()[0];
std::int64_t c = s.lens()[1]; std::int64_t c = s.lens()[1];
......
...@@ -214,7 +214,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const ...@@ -214,7 +214,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
ih = m.add_literal(migraphx::literal{ih_shape, data}); ih = m.add_literal(migraphx::literal{ih_shape, data});
} }
if(!is_forward and variable_seq_len) if(not is_forward and variable_seq_len)
{ {
args[0] = args[0] =
m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens); m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens);
...@@ -520,7 +520,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const ...@@ -520,7 +520,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
ih = m.add_literal(migraphx::literal{ih_shape, data}); ih = m.add_literal(migraphx::literal{ih_shape, data});
} }
if(!is_forward and variable_seq_len) if(not is_forward and variable_seq_len)
{ {
args[0] = args[0] =
m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens); m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens);
...@@ -977,7 +977,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const ...@@ -977,7 +977,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
pph = args[7]; pph = args[7];
} }
if(!is_forward and variable_seq_len) if(not is_forward and variable_seq_len)
{ {
args[0] = args[0] =
m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens); m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens);
...@@ -1294,11 +1294,11 @@ bool rewrite_rnn::is_variable_seq_lens(const module& m, instruction_ref seq_lens ...@@ -1294,11 +1294,11 @@ bool rewrite_rnn::is_variable_seq_lens(const module& m, instruction_ref seq_lens
std::vector<int64_t> vec_lens; std::vector<int64_t> vec_lens;
arg_lens.visit([&](auto l) { vec_lens.assign(l.begin(), l.end()); }); arg_lens.visit([&](auto l) { vec_lens.assign(l.begin(), l.end()); });
int64_t l = 0; int64_t l = 0;
if(!vec_lens.empty()) if(not vec_lens.empty())
{ {
l = vec_lens[0]; l = vec_lens[0];
} }
if(!std::all_of(vec_lens.begin(), vec_lens.end(), [&](auto v) { return v == l; })) if(not std::all_of(vec_lens.begin(), vec_lens.end(), [&](auto v) { return v == l; }))
{ {
is_var_lens = true; is_var_lens = true;
} }
...@@ -1318,7 +1318,7 @@ rewrite_rnn::get_seq_len(const module& m, instruction_ref input, instruction_ref ...@@ -1318,7 +1318,7 @@ rewrite_rnn::get_seq_len(const module& m, instruction_ref input, instruction_ref
bool is_var_lens = is_variable_seq_lens(m, seq_lens); bool is_var_lens = is_variable_seq_lens(m, seq_lens);
auto input_shape = input->get_shape(); auto input_shape = input->get_shape();
auto length = input_shape.lens()[0]; auto length = input_shape.lens()[0];
if(!is_var_lens and seq_lens != m.end()) if(not is_var_lens and seq_lens != m.end())
{ {
auto arg_len = seq_lens->eval(); auto arg_len = seq_lens->eval();
std::vector<std::size_t> vec_lens; std::vector<std::size_t> vec_lens;
...@@ -1387,7 +1387,7 @@ void rewrite_rnn::replace_last_cell_output(module& m, ...@@ -1387,7 +1387,7 @@ void rewrite_rnn::replace_last_cell_output(module& m,
if(variable_seq_len) if(variable_seq_len)
{ {
if(!ins_outputs.empty()) if(not ins_outputs.empty())
{ {
cell_outputs = m.insert_instruction( cell_outputs = m.insert_instruction(
std::next(ins), std::next(ins),
......
...@@ -477,7 +477,7 @@ bool operator==(const shape::dynamic_dimension& x, const shape::dynamic_dimensio ...@@ -477,7 +477,7 @@ bool operator==(const shape::dynamic_dimension& x, const shape::dynamic_dimensio
bool operator!=(const shape::dynamic_dimension& x, const shape::dynamic_dimension& y) bool operator!=(const shape::dynamic_dimension& x, const shape::dynamic_dimension& y)
{ {
return !(x == y); return not(x == y);
} }
std::ostream& operator<<(std::ostream& os, const shape::dynamic_dimension& x) std::ostream& operator<<(std::ostream& os, const shape::dynamic_dimension& x)
{ {
...@@ -497,7 +497,7 @@ bool operator==(const shape& x, const shape& y) ...@@ -497,7 +497,7 @@ bool operator==(const shape& x, const shape& y)
x.strides() == y.strides() and x.sub_shapes() == y.sub_shapes()); x.strides() == y.strides() and x.sub_shapes() == y.sub_shapes());
} }
bool operator!=(const shape& x, const shape& y) { return !(x == y); } bool operator!=(const shape& x, const shape& y) { return not(x == y); }
std::ostream& operator<<(std::ostream& os, const shape& x) std::ostream& operator<<(std::ostream& os, const shape& x)
{ {
......
...@@ -57,12 +57,14 @@ auto conv_const_weights() ...@@ -57,12 +57,14 @@ auto conv_const_weights()
auto reduction() { return match::name_contains("reduce"); } auto reduction() { return match::name_contains("reduce"); }
// conv(x, w) * a => conv(x, a * w)
struct find_mul_conv struct find_mul_conv
{ {
auto matcher() const auto matcher() const
{ {
return match::name("mul")(match::either_arg(0, 1)(conv_const_weights().bind("conv"), return match::name("mul")(
match::name("broadcast").bind("a"))); match::either_arg(0, 1)(conv_const_weights().bind("conv"),
match::name("broadcast", "multibroadcast").bind("a")));
} }
void apply(module& m, const match::matcher_result& r) const void apply(module& m, const match::matcher_result& r) const
...@@ -72,14 +74,35 @@ struct find_mul_conv ...@@ -72,14 +74,35 @@ struct find_mul_conv
auto a_ins = r.instructions["a"]; auto a_ins = r.instructions["a"];
auto w_ins = r.instructions["w"]; auto w_ins = r.instructions["w"];
auto broadcast_op = any_cast<op::broadcast>(a_ins->get_operator()); const auto& a_input_lens = a_ins->inputs().front()->get_shape().lens();
if(broadcast_op.axis != 1)
std::size_t num_not_one_dims = std::count_if(
a_input_lens.cbegin(), a_input_lens.cend(), [](auto dim) { return dim != 1; });
if(num_not_one_dims > 1)
return;
// check broadcasted along channels
const auto& a_lens = a_ins->get_shape().lens();
const auto& a_strides = a_ins->get_shape().strides();
auto is_broadcasted_axis = [](auto len, auto stride) { return len == 1 or stride == 0; };
if(a_strides.at(1) != 1)
return;
if(not is_broadcasted_axis(a_lens.front(), a_strides.front()))
return; return;
if(not std::equal(a_lens.begin() + 2,
a_lens.end(),
a_strides.begin() + 2,
a_strides.end(),
is_broadcasted_axis))
return;
auto sq = m.insert_instruction(ins, make_op("squeeze"), a_ins->inputs().front());
auto new_a = m.insert_instruction( auto new_a = m.insert_instruction(
ins, ins, make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}), sq);
make_op("broadcast", {{"axis", 0}, {"out_lens", w_ins->get_shape().lens()}}),
a_ins->inputs().front());
auto new_mul = m.insert_instruction(ins, make_op("mul"), new_a, w_ins); auto new_mul = m.insert_instruction(ins, make_op("mul"), new_a, w_ins);
auto new_conv = m.insert_instruction( auto new_conv = m.insert_instruction(
ins, conv_ins->get_operator(), conv_ins->inputs().front(), new_mul); ins, conv_ins->get_operator(), conv_ins->inputs().front(), new_mul);
...@@ -208,6 +231,42 @@ struct find_mul_add ...@@ -208,6 +231,42 @@ struct find_mul_add
} }
}; };
struct find_dot_add
{
auto matcher() const
{
return match::name("dot")(match::either_arg(0, 1)(
match::name("add")(
match::either_arg(0, 1)(match::any().bind("x"),
match::any_of(match::is_constant()).bind("b")),
match::none_of(match::args(match::is_constant(), match::is_constant())),
match::used_once()),
match::is_constant().bind("a")));
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto a_ins = r.instructions["a"];
auto b_ins = r.instructions["b"];
auto x_ins = r.instructions["x"];
assert(x_ins != b_ins);
const bool flipped = a_ins == ins->inputs().back();
auto insert_dot = [&](auto x, auto y) {
if(flipped)
return m.insert_instruction(ins, make_op("dot"), y, x);
else
return m.insert_instruction(ins, make_op("dot"), x, y);
};
auto ax_ins = insert_dot(a_ins, x_ins);
auto ab_ins = insert_dot(a_ins, b_ins);
m.replace_instruction(ins, make_op("add"), ax_ins, ab_ins);
}
};
struct find_add_lit_broadcast struct find_add_lit_broadcast
{ {
auto matcher() const auto matcher() const
...@@ -267,28 +326,26 @@ struct find_double_add_lit_broadcast ...@@ -267,28 +326,26 @@ struct find_double_add_lit_broadcast
struct find_inner_broadcast struct find_inner_broadcast
{ {
auto matcher() const auto matcher() const { return pointwise(match::all_of[match::inputs()](match::broadcast())); }
{
return pointwise(
match::nargs(2),
match::args(match::name("broadcast").bind("x"), match::name("broadcast").bind("y")));
}
void apply(module& m, const match::matcher_result& r) const void apply(module& m, const match::matcher_result& r) const
{ {
auto ins = r.result; auto ins = r.result;
auto x_ins = r.instructions["x"]; auto broadcasts = ins->inputs();
auto y_ins = r.instructions["y"]; if(broadcasts.empty())
return;
auto xbroadcast = any_cast<op::broadcast>(x_ins->get_operator()); std::vector<instruction_ref> inputs;
auto ybroadcast = any_cast<op::broadcast>(y_ins->get_operator()); std::transform(broadcasts.begin(),
broadcasts.end(),
if(xbroadcast.axis != ybroadcast.axis) std::back_inserter(inputs),
[](auto i) { return i->inputs().front(); });
if(std::any_of(inputs.begin(), inputs.end(), [&](auto i) {
return i->get_shape() != inputs.front()->get_shape();
}))
return; return;
auto op = m.insert_instruction( auto op = m.insert_instruction(ins, ins->get_operator(), inputs);
ins, ins->get_operator(), x_ins->inputs().front(), y_ins->inputs().front()); m.replace_instruction(ins, broadcasts.front()->get_operator(), op);
m.replace_instruction(ins, xbroadcast, op);
} }
}; };
...@@ -378,6 +435,24 @@ struct find_concat_op ...@@ -378,6 +435,24 @@ struct find_concat_op
} }
}; };
void move_instructions_back(module& m, instruction_ref pos, std::vector<instruction_ref> inss)
{
auto start = range(m.begin(), pos);
for(auto ins : iterator_for(start))
{
auto it = std::find(inss.begin(), inss.end(), ins);
if(it != inss.end())
inss.erase(it);
}
for(auto ins : inss)
{
if(not m.has_instruction(ins))
continue;
move_instructions_back(m, pos, ins->inputs());
m.move_instruction(ins, pos);
}
}
std::vector<instruction_ref> get_splits(instruction_ref ins) std::vector<instruction_ref> get_splits(instruction_ref ins)
{ {
std::vector<instruction_ref> result; std::vector<instruction_ref> result;
...@@ -416,8 +491,9 @@ struct find_splits ...@@ -416,8 +491,9 @@ struct find_splits
{ {
auto matcher() const auto matcher() const
{ {
return match::any(match::any_of[match::outputs()](match::name("slice")( return match::any(
match::any_of[match::outputs()](match::pointwise(), reduction())))); match::any_of[match::outputs()](match::name("slice")(match::any_of[match::outputs()](
match::pointwise(match::any_of(match::nargs(1), match::nargs(2))), reduction()))));
} }
static bool is_dependent(const module& m, instruction_ref ins1, instruction_ref ins2) static bool is_dependent(const module& m, instruction_ref ins1, instruction_ref ins2)
...@@ -552,8 +628,7 @@ struct find_splits ...@@ -552,8 +628,7 @@ struct find_splits
})) }))
return; return;
for(auto data : data_args) move_instructions_back(m, ins, data_args);
m.move_instructions(data, ins);
auto slice_op = any_cast<op::slice>(splits.front()->get_operator()); auto slice_op = any_cast<op::slice>(splits.front()->get_operator());
assert(not slice_op.axes.empty()); assert(not slice_op.axes.empty());
...@@ -580,10 +655,9 @@ struct find_splits ...@@ -580,10 +655,9 @@ struct find_splits
auto outputs = i->outputs(); auto outputs = i->outputs();
for(auto output : outputs) for(auto output : outputs)
{ {
if(not contains({"reshape", "squeeze", "unsqueeze"}, output->name())) if(output->name() != "reshape")
continue; continue;
auto x = auto x = m.insert_instruction(output, make_op("contiguous"), i);
m.insert_instruction(output, make_op("contiguous"), output->inputs());
m.replace_instruction(output, output->get_operator(), x); m.replace_instruction(output, output->get_operator(), x);
} }
...@@ -753,7 +827,7 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins) ...@@ -753,7 +827,7 @@ MIGRAPHX_PRED_MATCHER(horiz_conv_dot, instruction_ref ins)
}; };
auto dots = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("dot")); auto dots = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("dot"));
auto convs = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("convolution")); auto convs = std::count_if(ins->outputs().begin(), ins->outputs().end(), pred("convolution"));
return !(dots < 2 and convs < 2); return not(dots < 2 and convs < 2);
} }
struct find_conv_dot_horiz_fusion struct find_conv_dot_horiz_fusion
...@@ -773,7 +847,7 @@ struct find_conv_dot_horiz_fusion ...@@ -773,7 +847,7 @@ struct find_conv_dot_horiz_fusion
auto y = j->inputs()[1]->get_shape().lens(); auto y = j->inputs()[1]->get_shape().lens();
if(x.size() != y.size()) if(x.size() != y.size())
return false; return false;
// Check that non-axises match // Check that non-axes match
int axis = 1; int axis = 1;
if(i->name() == "dot") if(i->name() == "dot")
{ {
...@@ -807,15 +881,23 @@ struct find_conv_dot_horiz_fusion ...@@ -807,15 +881,23 @@ struct find_conv_dot_horiz_fusion
concat_axis = axis; concat_axis = axis;
} }
for(auto arg : args) move_instructions_back(m, input, args);
m.move_instructions(arg, input); // TODO: Check if axes match
// TODO: Check if axises match
auto concat = auto concat =
m.insert_instruction(input, make_op("concat", {{"axis", concat_axis}}), args); m.insert_instruction(input, make_op("concat", {{"axis", concat_axis}}), args);
auto fused = m.insert_instruction(std::next(input), op, input, concat); auto fused = m.insert_instruction(std::next(input), op, input, concat);
int64_t offset = 0; int64_t offset = 0;
for(auto arg : range(start, last)) for(auto arg : range(start, last))
{ {
auto outputs = arg->outputs();
for(auto output : outputs)
{
if(output->name() != "reshape")
continue;
auto x = m.insert_instruction(output, make_op("contiguous"), arg);
m.replace_instruction(output, output->get_operator(), x);
}
int64_t len = arg->get_shape().lens()[axis]; int64_t len = arg->get_shape().lens()[axis];
m.replace_instruction( m.replace_instruction(
arg, arg,
...@@ -1010,7 +1092,7 @@ struct find_split_reshape ...@@ -1010,7 +1092,7 @@ struct find_split_reshape
// all outputs are reshape and of the same shape // all outputs are reshape and of the same shape
auto dims = any_cast<op::reshape>(rsp->get_operator()).dims; auto dims = any_cast<op::reshape>(rsp->get_operator()).dims;
if(!same_ops(vec_rsp)) if(not same_ops(vec_rsp))
{ {
return; return;
} }
...@@ -1026,23 +1108,42 @@ struct find_split_reshape ...@@ -1026,23 +1108,42 @@ struct find_split_reshape
auto rsp_lens = rsp->get_shape().lens(); auto rsp_lens = rsp->get_shape().lens();
auto rsp_strides = rsp->get_shape().strides(); auto rsp_strides = rsp->get_shape().strides();
rsp_strides.insert(rsp_strides.begin(), rsp_strides[0] * rsp_lens[0]); rsp_strides.insert(rsp_strides.begin(), rsp_strides[0] * rsp_lens[0]);
auto ait = std::find(rsp_strides.begin(), rsp_strides.end(), slc_dim_size);
auto ait = std::find(rsp_strides.begin(), rsp_strides.end(), slc_dim_size);
int rsp_axis = -1;
if(ait == rsp_strides.end()) if(ait == rsp_strides.end())
{ {
return; return;
} }
int rsp_axis = std::distance(rsp_strides.begin(), ait); else if(ait == rsp_strides.end() - 1)
{
// edge case
// slice_dim == 1, in that case it could match with last stride of 1.
// it should accumulate lengths from last dim in that case. discount 1 to avoid going
// out of bounds.
assert(slc_dim_size == 1);
rsp_axis = std::distance(rsp_strides.begin(), ait) - 1;
}
else
{
rsp_axis = std::distance(rsp_strides.begin(), ait);
}
// calculate reshape output shape // calculate reshape output shape
std::vector<int64_t> vec_dims(vec_rsp.size()); std::vector<int64_t> vec_dims(vec_rsp.size());
std::transform(vec_rsp.begin(), vec_rsp.end(), vec_dims.begin(), [&](auto is) { std::transform(vec_rsp.begin(), vec_rsp.end(), vec_dims.begin(), [&](auto is) {
return is->get_shape().lens()[rsp_axis]; return is->get_shape().lens()[rsp_axis];
}); });
std::vector<int64_t> rsp_out_lens(rsp_lens.begin(), rsp_lens.end()); std::vector<int64_t> rsp_out_lens(rsp_lens.begin(), rsp_lens.end());
rsp_out_lens[rsp_axis] = std::accumulate(vec_dims.begin(), vec_dims.end(), std::int64_t{0}); rsp_out_lens[rsp_axis] = std::accumulate(vec_dims.begin(), vec_dims.end(), std::int64_t{0});
// insert the reshape instruction // insert the reshape instruction and add contiguous if needed
if(not input->get_shape().standard())
{
input = m.insert_instruction(std::next(input), make_op("contiguous"), input);
}
auto rsp_ins = m.insert_instruction( auto rsp_ins = m.insert_instruction(
std::next(input), make_op("reshape", {{"dims", rsp_out_lens}}), input); std::next(input), make_op("reshape", {{"dims", rsp_out_lens}}), input);
...@@ -1089,7 +1190,7 @@ struct find_split_transpose ...@@ -1089,7 +1190,7 @@ struct find_split_transpose
// all transpose are the same // all transpose are the same
auto perm = any_cast<op::transpose>(trans->get_operator()).dims; auto perm = any_cast<op::transpose>(trans->get_operator()).dims;
if(!same_ops(vec_trans)) if(not same_ops(vec_trans))
{ {
return; return;
} }
...@@ -1136,6 +1237,7 @@ void simplify_algebra::apply(module& m) const ...@@ -1136,6 +1237,7 @@ void simplify_algebra::apply(module& m) const
find_neg_unit_ops{}, find_neg_unit_ops{},
find_zero_ops{}, find_zero_ops{},
find_zero_div_const{}, find_zero_div_const{},
find_dot_add{},
find_div_const{}, find_div_const{},
find_sub_const{}, find_sub_const{},
find_rsqrt{}, find_rsqrt{},
......
...@@ -99,7 +99,7 @@ struct find_reshaper ...@@ -99,7 +99,7 @@ struct find_reshaper
std::vector<instruction_ref> reshapes{ins}; std::vector<instruction_ref> reshapes{ins};
while(is_reshaper(reshapes.back())) while(is_reshaper(reshapes.back()))
{ {
assert(!reshapes.back()->inputs().empty()); assert(not reshapes.back()->inputs().empty());
assert(m.has_instruction(reshapes.back()->inputs().front())); assert(m.has_instruction(reshapes.back()->inputs().front()));
auto input = reshapes.back()->inputs().front(); auto input = reshapes.back()->inputs().front();
reshapes.push_back(input); reshapes.push_back(input);
...@@ -151,8 +151,11 @@ struct find_transpose ...@@ -151,8 +151,11 @@ struct find_transpose
{ {
auto matcher() const auto matcher() const
{ {
return match::name("transpose")(match::none_of( auto output_not_transpose =
match::skip_output(match::name("contiguous"))(match::name("transpose")))); match::none_of(match::skip_output(match::name("contiguous"))(match::name("transpose")));
auto input_has_transpose =
match::args(match::skip(match::name("contiguous"))(match::name("transpose")));
return match::name("transpose")(output_not_transpose, input_has_transpose);
} }
void apply(module& m, const match::matcher_result& mr) const void apply(module& m, const match::matcher_result& mr) const
...@@ -268,6 +271,44 @@ struct find_nested_slice ...@@ -268,6 +271,44 @@ struct find_nested_slice
} }
}; };
struct find_concat_multibroadcasts
{
auto matcher() const
{
return match::name("concat")(match::all_of[match::inputs()](match::name("multibroadcast")));
}
void apply(module& m, const match::matcher_result& mr) const
{
auto ins = mr.result;
auto op = any_cast<op::concat>(ins->get_operator());
auto out_lens = ins->get_shape().lens();
auto inputs = ins->inputs();
auto in_strides = inputs.front()->get_shape().strides();
// Only apply when concat axis is not a broadcasted dimension
if(std::any_of(inputs.begin(), inputs.end(), [&](auto i) {
return i->get_shape().strides()[op.axis] == 0;
}))
{
return;
}
// Use inputs of multibroadcast ops as inputs to new concat op
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [](auto i) {
return i->inputs().front();
});
// Reduce axis by number of leading broadcasted dimensions
if(inputs.front()->get_shape().lens().size() < out_lens.size())
op.axis -= std::count(in_strides.begin(), in_strides.begin() + op.axis, 0);
auto concat = m.insert_instruction(ins, op, inputs);
m.replace_instruction(
ins, migraphx::make_op("multibroadcast", {{"out_lens", out_lens}}), concat);
}
};
struct find_concat_transpose struct find_concat_transpose
{ {
auto matcher() const auto matcher() const
...@@ -285,7 +326,7 @@ struct find_concat_transpose ...@@ -285,7 +326,7 @@ struct find_concat_transpose
auto permutation = find_permutation(s); auto permutation = find_permutation(s);
// permutation should be the same for all inputs // permutation should be the same for all inputs
if(!std::all_of(trans_inputs.begin(), trans_inputs.end(), [&](auto in) { if(not std::all_of(trans_inputs.begin(), trans_inputs.end(), [&](auto in) {
return (find_permutation(in->get_shape()) == permutation); return (find_permutation(in->get_shape()) == permutation);
})) }))
{ {
...@@ -664,9 +705,94 @@ struct find_slice_transpose ...@@ -664,9 +705,94 @@ struct find_slice_transpose
} }
}; };
struct find_transpose_slice
{
auto matcher() const
{
return match::name("transpose")(match::all_of[match::outputs()](match::name("slice")));
}
static std::vector<int64_t> slice_distance(const op::slice& op)
{
assert(op.starts.size() == op.ends.size());
std::vector<int64_t> result(op.starts.size());
std::transform(
op.ends.begin(), op.ends.end(), op.starts.begin(), result.begin(), std::minus<>{});
return result;
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto slices = ins->outputs();
if(slices.empty())
return;
auto slice = any_cast<op::slice>(slices.front()->get_operator());
auto sdistance = slice_distance(slice);
// Check all distances and axes are the same
if(std::any_of(slices.begin(), slices.end(), [&](auto sins) {
auto s = any_cast<op::slice>(sins->get_operator());
return s.axes != slice.axes or slice_distance(s) != sdistance;
}))
return;
// Check distances are divisible by lens of corresponding axes
auto mod_by_distance = [&](const auto& v, auto f) {
return std::inner_product(v.begin(),
v.end(),
sdistance.begin(),
0,
std::plus<>{},
[&](auto x, auto d) -> uint64_t {
if(d == 0)
return 1;
return f(x) % d;
});
};
if(mod_by_distance(slice.axes, [&](auto x) { return ins->get_shape().lens()[x]; }) != 0 or
mod_by_distance(slice.starts, id{}) != 0 or mod_by_distance(slice.ends, id{}) != 0)
return;
// TODO: Handle multiple axes
if(sdistance.size() != 1)
return;
auto axis = slice.axes.front();
// Skip if axis would be packed
if(std::all_of(ins->get_shape().lens().begin(),
ins->get_shape().lens().begin() + axis,
[](auto x) { return x == 1; }))
return;
// Compute axis before transpose to use for unsqueeze
auto perm = ins->get_operator().to_value()["permutation"].to_vector<int64_t>();
auto preaxis = std::find(perm.begin(), perm.end(), axis) - perm.begin();
// Make unsqeeze
auto unsqueeze = m.insert_instruction(
ins, make_op("unsqueeze", {{"axes", {preaxis}}, {"steps", sdistance}}), ins->inputs());
// Make transpose
std::transform(perm.begin(), perm.end(), perm.begin(), [&](auto i) {
if(i > preaxis)
return i + 1;
return i;
});
perm.insert(perm.begin(), preaxis + 1);
auto transpose =
m.insert_instruction(ins, make_op("transpose", {{"permutation", perm}}), unsqueeze);
// Slice and squeeze
for(auto s : slices)
{
auto op = any_cast<op::slice>(s->get_operator());
op.axes = {0};
op.starts = {op.starts.front() / sdistance.front()};
op.ends = {op.ends.front() / sdistance.front()};
auto slice_ins = m.insert_instruction(ins, op, transpose);
auto squeeze =
m.insert_instruction(ins, make_op("squeeze", {{"axes", {0}}}), slice_ins);
m.replace_instruction(s, squeeze);
}
}
};
void simplify_reshapes::apply(module& m) const void simplify_reshapes::apply(module& m) const
{ {
for(int i = 0; i < 2; i++) for(int i = 0; i < 4; i++)
{ {
match::find_matches(m, match::find_matches(m,
find_where_op{}, find_where_op{},
...@@ -676,9 +802,11 @@ void simplify_reshapes::apply(module& m) const ...@@ -676,9 +802,11 @@ void simplify_reshapes::apply(module& m) const
find_reshaper{}, find_reshaper{},
find_transpose{}, find_transpose{},
find_concat_transpose{}, find_concat_transpose{},
find_concat_multibroadcasts{},
find_nested_convert{}, find_nested_convert{},
find_nested_slice{}, find_nested_slice{},
find_nested_concat{}, find_nested_concat{},
find_transpose_slice{},
find_slice_transpose{}, find_slice_transpose{},
find_transpose_contiguous_reshaper_unary{}); find_transpose_contiguous_reshaper_unary{});
dead_code_elimination{}.apply(m); dead_code_elimination{}.apply(m);
......
...@@ -21,42 +21,91 @@ ...@@ -21,42 +21,91 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <migraphx/gpu/device/where.hpp>
#include <migraphx/gpu/device/tensor.hpp> #include <migraphx/sqlite.hpp>
#include <migraphx/gpu/device/types.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/device/launch.hpp> #include <migraphx/errors.hpp>
#include <sqlite3.h>
#include <algorithm>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <class Shape> using sqlite3_ptr = MIGRAPHX_MANAGE_PTR(sqlite3*, sqlite3_close);
constexpr auto get_rank(const Shape&)
{
return decltype(typename Shape::hip_index{}.size()){};
}
void where(hipStream_t stream, struct sqlite_impl
const argument& result,
const argument& arg0,
const argument& arg1,
const argument& arg2)
{ {
hip_visit_all(result, arg1, arg2)([&](auto output, auto x, auto y) { sqlite3* get() const { return ptr.get(); }
hip_visit_all(arg0)([&](auto cond) { void open(const fs::path& p, int flags)
if constexpr(get_rank(cond.get_shape()) == get_rank(output.get_shape())) {
sqlite3* ptr_tmp = nullptr;
int rc = sqlite3_open_v2(p.string().c_str(), &ptr_tmp, flags, nullptr);
ptr = sqlite3_ptr{ptr_tmp};
if(rc != 0)
MIGRAPHX_THROW("error opening " + p.string() + ": " + error_message());
}
template <class F>
void exec(const char* sql, F f)
{
auto callback = [](void* obj, auto... xs) -> int {
try
{ {
gs_launch(stream, arg1.get_shape().elements())([=](auto idx) __device__ { const auto* g = static_cast<const F*>(obj);
auto i = output.get_shape().multi(idx); (*g)(xs...);
output[i] = cond[i] ? x[i] : y[i]; return 0;
});
} }
}); catch(...)
{
return -1;
}
};
int rc = sqlite3_exec(get(), sql, callback, &f, nullptr);
if(rc != 0)
MIGRAPHX_THROW(error_message());
}
std::string error_message() const
{
std::string msg = "sqlite3: ";
return msg + sqlite3_errmsg(get());
}
sqlite3_ptr ptr;
};
sqlite sqlite::read(const fs::path& p)
{
sqlite r;
r.impl = std::make_shared<sqlite_impl>();
r.impl->open(p, SQLITE_OPEN_READONLY);
return r;
}
sqlite sqlite::write(const fs::path& p)
{
sqlite r;
r.impl = std::make_shared<sqlite_impl>();
// Using '+' instead of bitwise '|' to avoid compilation warning
r.impl->open(p, SQLITE_OPEN_READWRITE + SQLITE_OPEN_CREATE);
return r;
}
std::vector<std::unordered_map<std::string, std::string>> sqlite::execute(const std::string& s)
{
std::vector<std::unordered_map<std::string, std::string>> result;
impl->exec(s.c_str(), [&](int n, char** texts, char** names) {
std::unordered_map<std::string, std::string> row;
row.reserve(n);
std::transform(
names,
names + n,
texts,
std::inserter(row, row.begin()),
[&](const char* name, const char* text) { return std::make_pair(name, text); });
result.push_back(row);
}); });
return result;
} }
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -35,6 +35,7 @@ add_library(migraphx_cpu ...@@ -35,6 +35,7 @@ add_library(migraphx_cpu
dnnl.cpp dnnl.cpp
eltwise.cpp eltwise.cpp
erf.cpp erf.cpp
fmod.cpp
fuse_ops.cpp fuse_ops.cpp
gather.cpp gather.cpp
gemm.cpp gemm.cpp
...@@ -42,6 +43,7 @@ add_library(migraphx_cpu ...@@ -42,6 +43,7 @@ add_library(migraphx_cpu
logsoftmax.cpp logsoftmax.cpp
lowering.cpp lowering.cpp
lrn.cpp lrn.cpp
mod.cpp
preallocate.cpp preallocate.cpp
pooling.cpp pooling.cpp
reduction.cpp reduction.cpp
......
...@@ -49,7 +49,7 @@ struct dnnl_binary : dnnl_op<dnnl_binary, dnnl::binary> ...@@ -49,7 +49,7 @@ struct dnnl_binary : dnnl_op<dnnl_binary, dnnl::binary>
auto s0 = inputs.at(0); auto s0 = inputs.at(0);
auto s1 = inputs.at(1); auto s1 = inputs.at(1);
auto r = s0; auto r = s0;
if(s0 != s1 or !s0.packed()) if(s0 != s1 or not s0.packed())
{ {
r = shape{s0.type(), s0.lens()}; r = shape{s0.type(), s0.lens()};
} }
......
...@@ -21,16 +21,16 @@ ...@@ -21,16 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <migraphx/config.hpp>
#include <migraphx/target_assignments.hpp> #include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/fmod.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
void target_assignments::add_assignment(instruction_ref ins, const std::string& target) template struct cpu_binary<op::fmod>;
{
assignments.emplace(ins, target);
}
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#include <migraphx/op/argmax.hpp> #include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp> #include <migraphx/op/argmin.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp> #include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/shape_for_each.hpp> #include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp> #include <migraphx/par_dfor.hpp>
......
...@@ -21,22 +21,16 @@ ...@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP #include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_COS_HPP #include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cos.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace cpu {
struct hip_cos : unary_device<hip_cos, device::cos> template struct cpu_binary<op::mod>;
{
};
} // namespace gpu } // namespace cpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/fpga/context.hpp> #include <migraphx/fpga/context.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/supported_segments.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -41,7 +42,7 @@ struct target ...@@ -41,7 +42,7 @@ struct target
std::string name() const; std::string name() const;
std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const; std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
migraphx::context get_context() const { return context{}; } migraphx::context get_context() const { return context{}; }
float is_supported(instruction_ref ins, support_metric m); supported_segments find_supported(const_module_ref mod, support_metric m) const;
argument copy_to(const argument& arg) const { return arg; } argument copy_to(const argument& arg) const { return arg; }
argument copy_from(const argument& arg) const { return arg; } argument copy_from(const argument& arg) const { return arg; }
......
...@@ -95,7 +95,7 @@ void subgraph::apply(module_pass_manager& mpm) const ...@@ -95,7 +95,7 @@ void subgraph::apply(module_pass_manager& mpm) const
for(auto it : iterator_for(mod)) for(auto it : iterator_for(mod))
{ {
// assuming we want all the params/literals as inputs to the FPGA submodule // assuming we want all the params/literals as inputs to the FPGA submodule
if(migraphx::starts_with(it->name(), "@param") || if(migraphx::starts_with(it->name(), "@param") or
migraphx::starts_with(it->name(), "@literal")) migraphx::starts_with(it->name(), "@literal"))
{ {
literal_inputs.push_back(it); literal_inputs.push_back(it);
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/normalize_ops.hpp> #include <migraphx/normalize_ops.hpp>
#include <migraphx/iterator_for.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -62,12 +63,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -62,12 +63,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
argument target::allocate(const shape& s) const { return fill_argument(s, 0); } argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
float is_supported(instruction_ref ins, support_metric m) supported_segments target::find_supported(const_module_ref mod, support_metric m) const
{ {
// for now, not using the ins and metric to return a value
(void)ins;
(void)m; (void)m;
return 1.0;
supported_segment instrs;
for(const auto ins : iterator_for(*mod))
{
instrs.instructions.insert(ins);
}
instrs.metric = 1; // arbitrary value
return {instrs};
} }
MIGRAPHX_REGISTER_TARGET(target); MIGRAPHX_REGISTER_TARGET(target);
......
...@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS} ...@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}") message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
add_embed_library(migraphx_kernels ${KERNEL_FILES}) add_embed_library(migraphx_kernels ${KERNEL_FILES})
add_library(migraphx_device file(GLOB DEVICE_GPU_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp)
device/acos.cpp add_library(migraphx_device ${DEVICE_GPU_SRCS})
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
device/contiguous.cpp
device/convert.cpp
device/cos.cpp
device/cosh.cpp
device/div.cpp
device/equal.cpp
device/erf.cpp
device/exp.cpp
device/fill.cpp
device/floor.cpp
device/gather.cpp
device/gelu.cpp
device/greater.cpp
device/int8_gemm_pack.cpp
device/layernorm.cpp
device/less.cpp
device/log.cpp
device/logical_and.cpp
device/logical_or.cpp
device/logical_xor.cpp
device/logsoftmax.cpp
device/max.cpp
device/min.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/reverse.cpp
device/rnn_variable_seq_lens.cpp
device/round.cpp
device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp
device/sign.cpp
device/sin.cpp
device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
device/topk.cpp
device/unary_not.cpp
device/where.cpp
)
add_library(compile_for_gpu INTERFACE) add_library(compile_for_gpu INTERFACE)
target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns) target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored) target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored)
...@@ -151,15 +79,12 @@ add_library(migraphx_gpu ...@@ -151,15 +79,12 @@ add_library(migraphx_gpu
argmax.cpp argmax.cpp
argmin.cpp argmin.cpp
batch_norm_inference.cpp batch_norm_inference.cpp
clip.cpp
code_object_op.cpp code_object_op.cpp
compile_ops.cpp compile_ops.cpp
compile_gen.cpp compile_gen.cpp
compile_hip.cpp compile_hip.cpp
compile_hip_code_object.cpp compile_hip_code_object.cpp
compiler.cpp compiler.cpp
concat.cpp
convert.cpp
convolution.cpp convolution.cpp
deconvolution.cpp deconvolution.cpp
device_name.cpp device_name.cpp
...@@ -184,6 +109,7 @@ add_library(migraphx_gpu ...@@ -184,6 +109,7 @@ add_library(migraphx_gpu
pack_int8_args.cpp pack_int8_args.cpp
prefuse_ops.cpp prefuse_ops.cpp
pad.cpp pad.cpp
perfdb.cpp
pooling.cpp pooling.cpp
quant_convolution.cpp quant_convolution.cpp
reverse.cpp reverse.cpp
...@@ -191,7 +117,6 @@ add_library(migraphx_gpu ...@@ -191,7 +117,6 @@ add_library(migraphx_gpu
rocblas.cpp rocblas.cpp
scatter.cpp scatter.cpp
schedule_model.cpp schedule_model.cpp
softmax.cpp
sync_device.cpp sync_device.cpp
target.cpp target.cpp
topk.cpp topk.cpp
...@@ -206,68 +131,18 @@ function(register_migraphx_gpu_ops PREFIX) ...@@ -206,68 +131,18 @@ function(register_migraphx_gpu_ops PREFIX)
endforeach() endforeach()
endfunction() endfunction()
register_migraphx_gpu_ops(hip_ register_migraphx_gpu_ops(hip_
acosh
acos
add
argmax argmax
argmin argmin
asinh
asin
atanh
atan
ceil
clip
concat
convert
cosh
cos
div
equal
erf
exp
floor
gather gather
greater
less
log
logsoftmax logsoftmax
logical_and
logical_or
logical_xor
loop loop
max
min
mul
multinomial multinomial
nonzero nonzero
pad pad
pow
prelu
prefix_scan_sum prefix_scan_sum
recip
reduce_max
reduce_mean
reduce_min
reduce_prod
reduce_sum
relu
reverse reverse
round
rsqrt
scatter scatter
sigmoid
sign
sinh
sin
softmax
sqdiff
sqrt
sub
tanh
tan
topk topk
unary_not
where
) )
register_migraphx_gpu_ops(miopen_ register_migraphx_gpu_ops(miopen_
abs abs
...@@ -321,26 +196,11 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}") ...@@ -321,26 +196,11 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "") set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "")
if(MIGRAPHX_ENABLE_MLIR) if(MIGRAPHX_ENABLE_MLIR)
find_library(MLIRAPI_LIBRARY MLIRMIOpen # Find package rocMLIR
PATH_SUFFIXES find_package(rocMLIR 1.0.0 CONFIG REQUIRED)
# Workaournd broken mlir install message(STATUS "Build with rocMLIR::rockCompiler ${rocMLIR_VERSION}")
lib/ lib/lib)
# REQUIRED is not supported before cmake 3.18
if(NOT MLIRAPI_LIBRARY)
message(FATAL_ERROR "libMLIRMIOpen not found")
else()
message(STATUS "Build with libMLIRMIOpen: " ${MLIRAPI_LIBRARY})
endif()
find_path(MLIRAPI_HEADERS NAMES mlir-c/Dialect/MIGraphX.h)
# Workaround MLIR broken installation
find_path(MLIRAPI_HEADERS2 NAMES mlir-c/Registration.h
PATH_SUFFIXES
include/external/include external/include)
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR") target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR")
target_include_directories(migraphx_gpu SYSTEM PRIVATE ${MLIRAPI_HEADERS} ${MLIRAPI_HEADERS2}) target_link_libraries(migraphx_gpu PUBLIC rocMLIR::rockCompiler)
target_link_libraries(migraphx_gpu PUBLIC ${MLIRAPI_LIBRARY})
endif() endif()
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "") set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "")
...@@ -379,9 +239,18 @@ endif() ...@@ -379,9 +239,18 @@ endif()
include(CheckLibraryExists) include(CheckLibraryExists)
get_target_property(MIOPEN_LOCATION MIOpen LOCATION) get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API) check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
if(HAS_FIND_2_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
else()
message(STATUS "MIOpen does not have Find-2.0 API")
endif()
if(HAS_FIND_MODE_API) if(HAS_FIND_MODE_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API) target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API)
message(STATUS "MIOpen has find mode api") message(STATUS "MIGraphx is using Find Mode API of MIOpen")
else() else()
message(STATUS "MIOpen does not have find mode api") message(STATUS "MIOpen does not have find mode api")
endif() endif()
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_clip::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.compute_shape(inputs);
}
argument hip_clip::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::clip(ctx.get_stream().get(), args.back(), args.front(), args.at(1), args.at(2));
return args.back();
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -51,7 +51,8 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>& ...@@ -51,7 +51,8 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
std::vector<void*> kargs(args.size()); std::vector<void*> kargs(args.size());
std::transform( std::transform(
args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); }); args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); });
k.launch(ctx.get_stream().get(), global, local, std::move(kargs)); auto [start, stop] = ctx.get_perf_events();
k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
return args[get_output_arg(args.size())]; return args[get_output_arg(args.size())];
} }
void code_object_op::finalize(context&, const shape&, const std::vector<shape>&) void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
......
...@@ -22,9 +22,17 @@ ...@@ -22,9 +22,17 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <migraphx/gpu/compile_gen.hpp> #include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/permutation.hpp> #include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/module.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -41,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs) ...@@ -41,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
return {4, 2}; return {4, 2};
} }
vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs) vectorize vectorize::elements(std::size_t axis,
const std::vector<shape>& inputs,
const std::vector<std::size_t>& sizes)
{ {
if(std::all_of( if(std::all_of(
inputs.begin(), inputs.end(), [&](const auto& s) { return s.lens()[axis] == 1; })) inputs.begin(), inputs.end(), [&](const auto& s) { return s.lens()[axis] == 1; }))
return {1, axis}; return {1, axis};
auto sizes = vector_sizes(inputs);
std::vector<std::size_t> max_vec_size; std::vector<std::size_t> max_vec_size;
std::transform(inputs.begin(), std::transform(inputs.begin(),
inputs.end(), inputs.end(),
...@@ -54,12 +63,19 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs ...@@ -54,12 +63,19 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
[&](const auto& input) -> std::size_t { [&](const auto& input) -> std::size_t {
auto stride = input.strides()[axis]; auto stride = input.strides()[axis];
auto len = input.lens()[axis]; auto len = input.lens()[axis];
if(stride != 0 and stride != 1) if(not contains({0, 1}, stride))
return 1; return 1;
if(len == 1 and input.elements() > sizes.front()) if(len == 1 and input.elements() > sizes.front())
return sizes.front(); return sizes.front();
auto it = std::find_if( auto it = std::find_if(sizes.begin(), sizes.end(), [&](auto vsize) {
sizes.begin(), sizes.end(), [&](auto i) { return (len % i) == 0; }); // The len is divisible by the size and all the strides are divisible by
// the size
return (len % vsize) == 0 and
std::all_of(
input.strides().begin(), input.strides().end(), [&](auto i) {
return contains({0, 1}, i) or i % vsize == 0;
});
});
if(it != sizes.end()) if(it != sizes.end())
return *it; return *it;
return 1; return 1;
...@@ -67,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs ...@@ -67,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
return {*std::min_element(max_vec_size.begin(), max_vec_size.end()), axis}; return {*std::min_element(max_vec_size.begin(), max_vec_size.end()), axis};
} }
vectorize vectorize::elements(context& ctx, std::size_t axis, const std::vector<shape>& inputs)
{
if(inputs.empty())
return {1, axis};
std::size_t n = std::max_element(inputs.begin(),
inputs.end(),
by(std::less<>{}, [](const auto& s) { return s.elements(); }))
->elements();
std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu();
std::size_t over = n / max_global;
bool broadcasted =
std::any_of(inputs.begin(), inputs.end(), [](const auto& s) { return s.broadcasted(); });
std::vector<std::size_t> sizes;
if(broadcasted and over > 8)
sizes.push_back(8);
if(over > 4)
sizes.push_back(4);
sizes.push_back(2);
return elements(axis, inputs, sizes);
}
vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs)
{
return elements(axis, inputs, vector_sizes(inputs));
}
std::string vectorize::str() const std::string vectorize::str() const
{ {
return "vectorize<" + to_string(size) + ", " + to_string(axis) + ">()"; return "vectorize<" + to_string(size) + ", " + to_string(axis) + ">()";
...@@ -75,25 +118,25 @@ std::string vectorize::str() const ...@@ -75,25 +118,25 @@ std::string vectorize::str() const
preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs) preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
{ {
const std::size_t max_lds_bytes = 4096; const std::size_t max_lds_bytes = 4096;
std::vector<bool> result; std::vector<bool> result(inputs.size());
std::transform(inputs.begin(), std::vector<std::size_t> preloaded;
inputs.end(), auto idxs = range(inputs.size());
std::back_inserter(result), std::copy_if(idxs.begin(), idxs.end(), std::back_inserter(preloaded), [&](auto i) {
[&](const shape& input) { return input.strides()[axis] == 0; }); return inputs[i].strides()[axis] == 0;
auto bytes = std::inner_product(inputs.begin(), });
inputs.end(), std::sort(preloaded.begin(), preloaded.end(), by(std::less<>{}, [&](auto i) {
result.begin(), return inputs[i].bytes();
std::size_t{0}, }));
std::plus<>{},
[](const shape& s, bool b) -> std::size_t { std::size_t bytes = 0;
if(b) for(auto i : preloaded)
return s.bytes(); {
return 0; const auto& input = inputs[i];
}); bytes += input.bytes();
if(bytes < max_lds_bytes) if(bytes > max_lds_bytes)
return {result}; break;
// TODO: Try to partially preload items result[i] = true;
std::fill(result.begin(), result.end(), false); }
return {result}; return {result};
} }
...@@ -125,6 +168,45 @@ std::string make_transformer_args(std::vector<std::string> transformers) ...@@ -125,6 +168,45 @@ std::string make_transformer_args(std::vector<std::string> transformers)
return join_strings(std::move(transformers), ", "); return join_strings(std::move(transformers), ", ");
} }
std::string generate_pointwise(const module& pm, const std::string& name)
{
module m = pm;
run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}});
cpp_generator g;
g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
g.add_point_op("sign", "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
g.add_point_op("less", "migraphx::abs(${0} < ${1})");
g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
g.add_point_op("not", "migraphx::abs(not ${0})");
// Add explict conversions
g.fresult(
[](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; });
g.create_function(
g.generate_module(m).set_attributes({"__device__"}).set_generic_types(m).set_name(name));
return g.str();
}
static std::vector<std::string> get_op_names(const module& m)
{
std::vector<std::string> result;
for(auto& ins : m)
{
if(starts_with(ins.name(), "@"))
continue;
result.push_back(ins.name());
}
return result;
}
std::string generate_name_from_ops(const module& m)
{
auto op_names = get_op_names(m);
return join_strings(op_names, "_");
}
} // namespace gen } // namespace gen
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -138,16 +138,16 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over) ...@@ -138,16 +138,16 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t groups = (n + local - 1) / local; std::size_t groups = (n + local - 1) / local;
std::size_t max_blocks = max_global / local; std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local; std::size_t nglobal = std::min(max_blocks * over, groups) * local;
return nglobal; return std::min(nglobal, n);
}; };
} }
std::size_t compute_block_size(std::size_t n, std::size_t max_block_size) std::size_t compute_block_size(std::size_t n, std::size_t max_block_size)
{ {
size_t block_size = 128; const std::size_t min_block_size = 64;
while(block_size <= max_block_size and block_size <= n) const std::size_t base_block_size = 32;
block_size *= 2; auto block_size = (((n - 1) / base_block_size + 1)) * base_block_size;
return block_size / 2; return std::min(std::max(min_block_size, block_size), max_block_size);
} }
operation compile_hip_code_object(const std::string& content, hip_compile_options options) operation compile_hip_code_object(const std::string& content, hip_compile_options options)
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape hip_convert::compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
check_shapes{inputs, *this}.packed();
return op.compute_shape(inputs);
}
argument hip_convert::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::convert(ctx.get_stream().get(), args[1], args[0]);
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment