Commit 17f4ba28 authored by Paul's avatar Paul
Browse files

Merge branch 'jit-vector-reduce' into jit-vector-softmax

parents a8a8d868 c84154b8
......@@ -9,7 +9,19 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
operation make_op(const std::string& name);
operation make_op(const std::string& name, const value& v);
operation make_op(const std::string& name,
const std::initializer_list<std::pair<std::string, value>>& v);
operation make_op_from_value(const std::string& name, const value& v);
// A template overload is added for migraphx::value so the initializer_list
// cannot be passed in directly. This is to enforce at compile-time that all
// initializer_list are key-value pairs, whereas migraphx::value allows other
// types of initializer_list such as for arrays.
template <class Value>
operation make_op(const std::string& name, const Value& v)
{
return make_op_from_value(name, v);
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -156,6 +156,19 @@ struct id_matcher
}
};
// Forward declare class and constructors
template <class M>
struct basic_matcher;
template <class M>
basic_matcher<M> make_basic_matcher(M m);
template <class F>
basic_matcher<function_matcher<F>> make_basic_fun_matcher(F f);
template <class P>
basic_matcher<predicate_matcher<P>> make_basic_pred_matcher(P p);
/// The basic matcher provides the all_of composability of the matcher
template <class M>
struct basic_matcher
......@@ -167,8 +180,8 @@ struct basic_matcher
{
// Copy m because we cant capture `this` by value
auto mm = m;
return make_bf_matcher([=](matcher_context& ctx,
instruction_ref ins) -> optional<instruction_ref> {
return make_basic_fun_matcher([=](matcher_context& ctx,
instruction_ref ins) -> optional<instruction_ref> {
auto result = mm.match(ctx, ins);
if(result)
{
......@@ -239,7 +252,39 @@ struct any_matcher : any_matcher_base
struct matcher_result
{
std::unordered_map<std::string, instruction_ref> instructions;
struct instruction_container
{
instruction_container() = default;
instruction_container(std::unordered_map<std::string, instruction_ref> x)
: ins_map(std::move(x))
{
}
instruction_ref operator[](const std::string& name) const
{
auto it = ins_map.find(name);
if(it == ins_map.end())
MIGRAPHX_THROW("Accessing name that wasn't bound in matcher: " + name);
return it->second;
}
auto find(const std::string& name) const { return ins_map.find(name); }
auto begin() const { return ins_map.cbegin(); }
auto end() const { return ins_map.cend(); }
bool has_instructions_in(const module& mod) const
{
return std::all_of(ins_map.begin(), ins_map.end(), [&](auto&& p) {
return mod.has_instruction(p.second);
});
}
private:
std::unordered_map<std::string, instruction_ref> ins_map;
};
instruction_container instructions;
instruction_ref result;
};
......@@ -255,6 +300,7 @@ matcher_result match_instruction(module& mod, instruction_ref ins, M&& m)
{
result.result = ins;
result.instructions = ctx.instructions;
assert(result.instructions.has_instructions_in(mod));
}
else
{
......@@ -533,6 +579,18 @@ auto skip_output(Ms... ms)
});
}
inline auto var(std::string s)
{
return make_basic_fun_matcher(
[=, s = std::move(s)](const matcher_context& ctx,
instruction_ref) -> optional<instruction_ref> {
auto it = ctx.instructions.find(s);
if(it == ctx.instructions.end())
return nullopt;
return it->second;
});
}
inline auto name(std::string s)
{
return make_basic_pred_matcher(
......
......@@ -17,7 +17,7 @@ struct memory_coloring
std::string allocation_op{};
bool verify = false;
std::string name() const { return "memory coloring"; }
void apply(module& p) const;
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -15,7 +15,7 @@ struct module;
struct propagate_constant
{
std::string name() const { return "propagate_constant"; }
void apply(module& p) const;
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -16,7 +16,7 @@ struct module;
struct rewrite_batchnorm
{
std::string name() const { return "rewrite_batchnorm"; }
void apply(module& p) const;
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -15,7 +15,7 @@ struct module;
struct rewrite_pooling
{
std::string name() const { return "rewrite_pooling"; }
void apply(module& prog) const;
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -19,22 +19,22 @@ struct module;
struct rewrite_rnn
{
std::string name() const { return "rewrite_rnn"; }
void apply(module& prog) const;
void apply(module& m) const;
private:
// for vanilla rnn operators
void apply_vanilla_rnn(module& prog, instruction_ref ins) const;
void apply_vanilla_rnn(module& m, instruction_ref ins) const;
std::vector<instruction_ref> vanilla_rnn_cell(bool is_forward,
module& prog,
module& m,
instruction_ref ins,
std::vector<instruction_ref> inputs,
operation& actv_func) const;
std::vector<operation> vanilla_rnn_actv_funcs(instruction_ref ins) const;
// for gru operators
void apply_gru(module& prog, instruction_ref ins) const;
void apply_gru(module& m, instruction_ref ins) const;
std::vector<instruction_ref> gru_cell(bool is_forward,
module& prog,
module& m,
instruction_ref ins,
std::vector<instruction_ref> inputs,
int linear_before_reset,
......@@ -44,9 +44,9 @@ struct rewrite_rnn
std::vector<operation> gru_actv_funcs(instruction_ref ins) const;
// for lstm operators
void apply_lstm(module& prog, instruction_ref ins) const;
void apply_lstm(module& m, instruction_ref ins) const;
std::vector<instruction_ref> lstm_cell(bool is_forward,
module& prog,
module& m,
instruction_ref ins,
std::vector<instruction_ref> inputs,
const operation& actv_func1,
......@@ -55,24 +55,23 @@ struct rewrite_rnn
std::vector<operation> lstm_actv_funcs(instruction_ref ins) const;
bool is_variable_seq_lens(const module& prog, instruction_ref seq_lens) const;
instruction_ref replace_last_hs_output(module& prog,
bool is_variable_seq_lens(const module& m, instruction_ref seq_lens) const;
instruction_ref replace_last_hs_output(module& m,
instruction_ref ins,
instruction_ref seq_lens,
instruction_ref last_hs_output,
op::rnn_direction dirct) const;
void replace_last_cell_output(module& prog,
void replace_last_cell_output(module& m,
instruction_ref ins,
instruction_ref seq_lens,
instruction_ref cell_outputs,
instruction_ref last_cell_output,
op::rnn_direction dirct) const;
std::size_t
get_seq_len(const module& prog, instruction_ref input, instruction_ref seq_lens) const;
std::size_t get_seq_len(const module& m, instruction_ref input, instruction_ref seq_lens) const;
instruction_ref pad_hidden_states(module& prog,
instruction_ref pad_hidden_states(module& m,
instruction_ref seq,
instruction_ref seq_lens,
instruction_ref hs) const;
......
......@@ -19,7 +19,7 @@ struct schedule
schedule_model model{};
bool enable = true;
std::string name() const { return "schedule"; }
void apply(module& p) const;
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -15,7 +15,7 @@ struct module;
struct simplify_algebra
{
std::string name() const { return "simplify_algebra"; }
void apply(module& p) const;
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -16,7 +16,7 @@ struct module;
struct simplify_reshapes
{
std::string name() const { return "simplify_reshapes"; }
void apply(module& p) const;
void apply(module& m) const;
};
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -5,20 +5,41 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
operation make_op(const std::string& name) { return load_op(name); }
operation make_op(const std::string& name, const value& v)
template <class F>
operation make_op_generic(const std::string& name, F for_each)
{
if(not(v.is_object() or (v.empty() and v.is_array())))
MIGRAPHX_THROW("Value is not an object");
auto op = load_op(name);
// Merge values
value w = op.to_value();
for(auto&& x : v)
{
w.at(x.get_key()) = x.without_key();
}
for_each([&](const auto& key, const auto& x) {
if(not w.contains(key))
// NOLINTNEXTLINE(performance-inefficient-string-concatenation)
MIGRAPHX_THROW("No key '" + key + "' in " + name);
w.at(key) = x;
});
op.from_value(w);
return op;
}
operation make_op(const std::string& name,
const std::initializer_list<std::pair<std::string, value>>& v)
{
return make_op_generic(name, [&](auto f) {
for(auto&& [key, x] : v)
f(key, x);
});
}
operation make_op_from_value(const std::string& name, const value& v)
{
if(not(v.is_object() or (v.empty() and v.is_array())))
MIGRAPHX_THROW("Value is not an object for make_op: " + name);
return make_op_generic(name, [&](auto f) {
for(auto&& x : v)
f(x.get_key(), x.without_key());
});
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -22,6 +22,8 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_FINALIZE)
struct module_impl
{
// A list is used to keep references to an instruction stable
......@@ -553,8 +555,14 @@ instruction_ref module::find_dangling_reference() const
void module::finalize(context& ctx)
{
const bool trace = enabled(MIGRAPHX_TRACE_FINALIZE{});
for(auto ins : iterator_for(*this))
{
if(trace)
{
std::cout << "Finalize: ";
this->debug_print(ins);
}
ins->finalize(ctx);
for(const auto& smod : ins->module_inputs())
{
......
......@@ -4,11 +4,11 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
void memory_coloring::apply(module& p) const
void memory_coloring::apply(module& m) const
{
if(!enabled(MIGRAPHX_DISABLE_MEMORY_COLORING{}))
{
memory_coloring_impl opt(&p, allocation_op, verify);
memory_coloring_impl opt(&m, allocation_op, verify);
opt.run();
}
}
......
......@@ -20,9 +20,9 @@ bool skip_propogate(instruction_ref ins)
return false;
}
void propagate_constant::apply(module& p) const
void propagate_constant::apply(module& m) const
{
for(auto i : iterator_for(p))
for(auto i : iterator_for(m))
{
if(i->name() != "@literal")
continue;
......@@ -42,8 +42,8 @@ void propagate_constant::apply(module& p) const
if(not r.empty())
{
assert(r.get_shape() == child->get_shape());
auto l = p.add_literal(r.get_shape(), r.data());
self(p.replace_instruction(child, l));
auto l = m.add_literal(r.get_shape(), r.data());
self(m.replace_instruction(child, l));
}
}
})(i);
......
......@@ -14,9 +14,9 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
void rewrite_batchnorm::apply(module& p) const
void rewrite_batchnorm::apply(module& m) const
{
for(auto ins : iterator_for(p))
for(auto ins : iterator_for(m))
{
if(ins->name() != "batch_norm_inference")
continue;
......@@ -46,13 +46,13 @@ void rewrite_batchnorm::apply(module& p) const
});
auto broadcast = op::broadcast{1, ins->get_shape().lens()};
auto a_ins = p.add_literal({a.get_shape(), a.data()});
auto a_broadcast = p.insert_instruction(ins, broadcast, a_ins);
auto mul = p.insert_instruction(ins, make_op("mul"), ins->inputs().front(), a_broadcast);
auto b_ins = p.add_literal({b.get_shape(), b.data()});
auto b_broadcast = p.insert_instruction(ins, broadcast, b_ins);
auto add = p.insert_instruction(ins, make_op("add"), mul, b_broadcast);
p.replace_instruction(ins, add);
auto a_ins = m.add_literal({a.get_shape(), a.data()});
auto a_broadcast = m.insert_instruction(ins, broadcast, a_ins);
auto mul = m.insert_instruction(ins, make_op("mul"), ins->inputs().front(), a_broadcast);
auto b_ins = m.add_literal({b.get_shape(), b.data()});
auto b_broadcast = m.insert_instruction(ins, broadcast, b_ins);
auto add = m.insert_instruction(ins, make_op("add"), mul, b_broadcast);
m.replace_instruction(ins, add);
}
}
......
......@@ -12,9 +12,9 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
void rewrite_pooling::apply(module& prog) const
void rewrite_pooling::apply(module& m) const
{
for(auto ins : iterator_for(prog))
for(auto ins : iterator_for(m))
{
if(ins->name() != "pooling")
continue;
......@@ -33,26 +33,25 @@ void rewrite_pooling::apply(module& prog) const
continue;
std::int64_t n = s.lens()[0];
std::int64_t c = s.lens()[1];
auto reshape = prog.insert_instruction(
auto reshape = m.insert_instruction(
ins, make_op("reshape", {{"dims", {n * c, -1}}}), ins->inputs().front());
instruction_ref pooling{};
// average pooling
if(op.mode == op::pooling_mode::average)
{
pooling =
prog.insert_instruction(ins, make_op("reduce_mean", {{"axes", {1}}}), reshape);
pooling = m.insert_instruction(ins, make_op("reduce_mean", {{"axes", {1}}}), reshape);
}
// max pooling
else
{
pooling = prog.insert_instruction(ins, make_op("reduce_max", {{"axes", {1}}}), reshape);
pooling = m.insert_instruction(ins, make_op("reduce_max", {{"axes", {1}}}), reshape);
}
std::vector<int64_t> rsp_lens(lens.size(), 1);
rsp_lens[0] = n;
rsp_lens[1] = c;
prog.replace_instruction(ins, make_op("reshape", {{"dims", rsp_lens}}), pooling);
m.replace_instruction(ins, make_op("reshape", {{"dims", rsp_lens}}), pooling);
}
}
......
This diff is collapsed.
......@@ -42,7 +42,7 @@ struct stream_info
std::unordered_map<instruction_ref, std::size_t> iweights;
ins_dep_map mod_implicit_deps;
void calc_implicit_deps(const module& p) { mod_implicit_deps = p.calc_implicit_deps(); }
void calc_implicit_deps(const module& m) { mod_implicit_deps = m.calc_implicit_deps(); }
void accumulate_weights(instruction_ref last, const schedule_model& model)
{
......@@ -116,15 +116,15 @@ struct stream_info
}
};
std::size_t assign_streams(module& p, std::size_t n)
std::size_t assign_streams(module& m, std::size_t n)
{
assert(n > 0);
partition critical;
std::unordered_map<instruction_ref, std::deque<partition>> partitions;
partitions.reserve(weights.size());
fix([&](auto self, auto ins, auto& part) {
assert(not is_end(ins, p.end()));
if(not p.has_instruction(ins))
assert(not is_end(ins, m.end()));
if(not m.has_instruction(ins))
return;
if(contains(partitions, ins))
return;
......@@ -151,8 +151,8 @@ struct stream_info
}
}
// Sort instructions
p.move_instruction(ins, p.end());
})(std::prev(p.end()), critical);
m.move_instruction(ins, m.end());
})(std::prev(m.end()), critical);
// Set the critical partition to stream 0
set_stream(critical, 0);
......@@ -197,13 +197,13 @@ struct stream_info
}
};
void sort(module& p, std::size_t)
void sort(module& m, std::size_t)
{
std::set<weight_ins, compare_weight_ins> children;
std::unordered_map<instruction_ref, std::size_t> visited;
auto last = std::prev(p.end());
auto last = std::prev(m.end());
auto mw = this->weights.at(last);
auto nw = mw / (p.size() + 1);
auto nw = mw / (m.size() + 1);
auto add_child = [&](auto ins) {
auto x = 1 + (mw - this->weights.at(ins)) / (nw + 1);
auto w = x * this->iweights.at(ins);
......@@ -222,10 +222,10 @@ struct stream_info
// Pop the first element
auto top = children.begin()->second;
children.erase(children.begin());
p.move_instruction(top, p.begin());
m.move_instruction(top, m.begin());
for(auto ins : top->inputs())
{
if(not p.has_instruction(ins))
if(not m.has_instruction(ins))
continue;
add_child(ins);
}
......@@ -234,7 +234,7 @@ struct stream_info
{
for(auto ins : mod_implicit_deps.at(top))
{
assert(p.has_instruction(ins));
assert(m.has_instruction(ins));
add_child(ins);
}
}
......@@ -242,12 +242,12 @@ struct stream_info
// move dangling parameter to the front so as not be removed
auto ins = std::next(last);
while(ins != p.end())
while(ins != m.end())
{
auto next = std::next(ins);
if(ins->name() == "@param")
{
p.move_instruction(ins, p.begin());
m.move_instruction(ins, m.begin());
}
ins = next;
}
......@@ -364,18 +364,18 @@ struct stream_info
}
std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>>
find_concurrent_instructions(module& p) const
find_concurrent_instructions(module& m) const
{
std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> merge_from;
dominator_info di = compute_dominator(p);
result.reserve(p.size());
merge_from.reserve(p.size());
for(auto ins : reverse_iterator_for(p))
dominator_info di = compute_dominator(m);
result.reserve(m.size());
merge_from.reserve(m.size());
for(auto ins : reverse_iterator_for(m))
{
for(auto&& arg : ins->outputs())
{
if(not p.has_instruction(arg))
if(not m.has_instruction(arg))
continue;
if(is_merge_point(arg))
merge_from[ins].insert(arg);
......@@ -415,18 +415,18 @@ struct stream_info
}
std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>
get_conflicts(module& p)
get_conflicts(module& m)
{
using conflict_table_type =
std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>;
conflict_table_type conflict_table;
auto concur_ins = this->find_concurrent_instructions(p);
auto concur_ins = this->find_concurrent_instructions(m);
// Compute an index for each instruction
std::unordered_map<instruction_ref, std::size_t> ins2index;
std::size_t index_total = 0;
for(auto ins : iterator_for(p))
for(auto ins : iterator_for(m))
ins2index[ins] = index_total++;
std::vector<conflict_table_type> thread_conflict_tables(
......@@ -507,21 +507,21 @@ struct stream_info
}
};
void schedule::apply(module& p) const
void schedule::apply(module& m) const
{
if(not enable)
return;
stream_info si;
si.calc_implicit_deps(p);
auto last = std::prev(p.end());
si.calc_implicit_deps(m);
auto last = std::prev(m.end());
si.accumulate_weights(last, model);
auto nstreams = si.assign_streams(p, model.concurrency());
si.sort(p, model.concurrency());
auto nstreams = si.assign_streams(m, model.concurrency());
si.sort(m, model.concurrency());
if(enabled(MIGRAPHX_TRACE_COMPILE{}) or enabled(MIGRAPHX_TRACE_SCHEDULE{}))
{
p.annotate(std::cout, [&](auto ins) {
m.annotate(std::cout, [&](auto ins) {
if(ins->name() == "@param" and not contains(si.weights, ins))
return;
......@@ -548,9 +548,9 @@ void schedule::apply(module& p) const
std::unordered_map<instruction_ref, std::size_t> ins2wait;
std::unordered_map<std::size_t, std::unordered_set<std::size_t>> waited_for;
std::unordered_map<instruction_ref, std::unordered_set<std::size_t>> ins2waited;
ins2wait.reserve(p.size());
ins2waited.reserve(p.size());
for(auto ins : iterator_for(p))
ins2wait.reserve(m.size());
ins2waited.reserve(m.size());
for(auto ins : iterator_for(m))
{
// Only schedule instructions that have a stream
if(not si.has_stream(ins))
......@@ -559,7 +559,7 @@ void schedule::apply(module& p) const
// Schedule instruction on the stream
auto stream = si.get_stream(ins);
assert(stream < model.concurrency());
model.sched(p, ins, stream);
model.sched(m, ins, stream);
// Insert wait instructions
if(si.is_merge_point(ins, stream))
{
......@@ -572,14 +572,14 @@ void schedule::apply(module& p) const
if(not contains(ins2wait, i))
{
ins2wait[i] = wait_id;
model.record(p, i, wait_id);
model.record(m, i, wait_id);
wait_id++;
}
auto w = ins2wait.at(i);
// If we already waited for the event on this stream then dont
// insert another wait event
if(not contains(waited_for[stream], w))
model.wait(p, ins, w);
model.wait(m, ins, w);
// Store the event as waited
waited_for[stream].insert(w);
// Store all wait events that have been waited on prior to the recorded instruction
......@@ -594,7 +594,7 @@ void schedule::apply(module& p) const
}
// Add memory conflicts
auto conflict_table = si.get_conflicts(p);
auto conflict_table = si.get_conflicts(m);
for(auto&& ip : conflict_table)
{
if(ip.second.empty())
......@@ -602,7 +602,7 @@ void schedule::apply(module& p) const
std::vector<instruction_ref> args;
args.push_back(ip.first);
args.insert(args.end(), ip.second.begin(), ip.second.end());
p.insert_instruction(std::next(ip.first), make_op("identity"), args);
m.insert_instruction(std::next(ip.first), make_op("identity"), args);
}
}
......
This diff is collapsed.
......@@ -53,7 +53,7 @@ struct match_find_quantizable_ops
match::arg(1)(dequantizelinear_op("x2", "scale2")));
}
void apply(module& m, match::matcher_result r) const
void apply(module& m, const match::matcher_result& r) const
{
auto qop = r.result;
auto q1 = r.instructions["x1"];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment