Commit af5b39a4 authored by Paul's avatar Paul
Browse files

Move to stream_info class

parent 0a39a958
......@@ -10,50 +10,18 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
bool stream_free(instruction_ref ins)
{
return is_context_free(ins->get_operator()) or ins->get_operator().name().front() == '@';
}
struct stream_info
{
std::unordered_map<instruction_ref, std::size_t> ins2stream;
std::unordered_map<instruction_ref, std::size_t> weights;
void set_stream(instruction_ref ins, std::size_t n) { ins2stream[ins] = n; }
std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }
bool has_stream(instruction_ref ins) const { return ins2stream.count(ins) > 0; }
bool different(const std::vector<instruction_ref>& v) const
{
if(v.size() < 2)
return false;
auto stream = get_stream(v.front());
return not std::all_of(
v.begin(), v.end(), [&](instruction_ref x) { return get_stream(x) == stream; });
}
bool is_split_point(instruction_ref ins) const { return different(ins->outputs()); }
bool is_merge_point(instruction_ref ins) const { return different(ins->inputs()); }
std::vector<std::size_t> wait_for(instruction_ref ins) const
{
std::set<std::size_t> result;
auto s = get_stream(ins);
for(auto i : ins->inputs())
void accumulate_weights(instruction_ref last, const schedule_model& model)
{
auto stream = get_stream(i);
if(stream != s)
result.insert(stream);
}
return {result.begin(), result.end()};
}
};
void schedule::apply(program& p) const
{
const std::size_t min_partition_threshold = 2;
// Compute accumulated weights
std::unordered_map<instruction_ref, std::size_t> weights;
auto last = std::prev(p.end());
fix<std::size_t>([&](auto self, auto ins) -> std::size_t {
if(weights.count(ins) == 0)
{
......@@ -65,23 +33,24 @@ void schedule::apply(program& p) const
}
return weights[ins];
})(last);
}
// Assign streams
auto streams = model.concurrency();
stream_info si;
void assign_streams(program& p, std::size_t streams)
{
const std::size_t min_partition_threshold = 2;
for(std::size_t stream = 0; stream < streams; stream++)
{
fix([&](auto self, auto ins) {
// Only assign streams fi not already assigned
if(not si.has_stream(ins))
si.set_stream(ins, stream);
if(not has_stream(ins))
set_stream(ins, stream);
instruction_ref child = p.end();
std::size_t w = 0;
for(auto i : ins->inputs())
{
const auto weight = weights[i];
// Skip instruction that already have stream assignment or too low of weights
if(si.has_stream(i) or weight <= min_partition_threshold)
if(has_stream(i) or weight <= min_partition_threshold)
{
self(i);
}
......@@ -94,16 +63,58 @@ void schedule::apply(program& p) const
}
if(child != p.end())
self(child);
})(last);
})(std::prev(p.end()));
}
// Assign remaining instructions
for(auto ins : iterator_for(p))
{
if(si.has_stream(ins))
if(has_stream(ins))
continue;
si.set_stream(ins, streams - 1);
set_stream(ins, streams - 1);
}
}
void set_stream(instruction_ref ins, std::size_t n) { ins2stream[ins] = n; }
std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }
bool has_stream(instruction_ref ins) const { return ins2stream.count(ins) > 0; }
bool different(const std::vector<instruction_ref>& v) const
{
if(v.size() < 2)
return false;
auto stream = get_stream(v.front());
return not std::all_of(
v.begin(), v.end(), [&](instruction_ref x) { return get_stream(x) == stream; });
}
bool is_split_point(instruction_ref ins) const { return different(ins->outputs()); }
bool is_merge_point(instruction_ref ins) const { return different(ins->inputs()); }
std::vector<std::size_t> wait_for(instruction_ref ins) const
{
std::set<std::size_t> result;
auto s = get_stream(ins);
for(auto i : ins->inputs())
{
auto stream = get_stream(i);
if(stream != s)
result.insert(stream);
}
return {result.begin(), result.end()};
}
};
void schedule::apply(program& p) const
{
stream_info si;
auto last = std::prev(p.end());
si.accumulate_weights(last, model);
si.assign_streams(p, model.concurrency());
// Topo sort
fix([&](auto self, auto ins) {
for(auto i : ins->inputs())
......
......@@ -79,7 +79,7 @@ void schedule_model::wait(program& p,
static std::unordered_map<std::string, std::size_t> create_weight_map()
{
return {
{"hip::load_literal", 1},
{"hip::load_literal", 0},
{"hip::allocate", 0},
{"gpu::convolution", 4},
{"gpu::conv_bias_relu", 4},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment