Move to stream_info class

af5b39a4 · Paul · 0a39a958 · af5b39a4 · af5b39a4
Commit af5b39a4 authored Mar 02, 2019 by Paul
Show whitespace changes
Inline Side-by-side

Showing with 65 additions and 54 deletions

src/schedule.cpp src/schedule.cpp +64 -53

src/targets/gpu/schedule_model.cpp src/targets/gpu/schedule_model.cpp +1 -1

No files found.
--- a/src/schedule.cpp
+++ b/src/schedule.cpp
@@ -10,50 +10,18 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+bool stream_free(instruction_ref ins)
+{
+    return is_context_free(ins->get_operator()) or ins->get_operator().name().front() == '@';
+}
 struct stream_info
 {
    std::unordered_map<instruction_ref, std::size_t> ins2stream;
+    std::unordered_map<instruction_ref, std::size_t> weights;
-    void set_stream(instruction_ref ins, std::size_t n) { ins2stream[ins] = n; }
+    void accumulate_weights(instruction_ref last, const schedule_model& model)
-    std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }
-    bool has_stream(instruction_ref ins) const { return ins2stream.count(ins) > 0; }
-    bool different(const std::vector<instruction_ref>& v) const
-    {
-        if(v.size() < 2)
-            return false;
-        auto stream = get_stream(v.front());
-        return not std::all_of(
-            v.begin(), v.end(), [&](instruction_ref x) { return get_stream(x) == stream; });
-    }
-    bool is_split_point(instruction_ref ins) const { return different(ins->outputs()); }
-    bool is_merge_point(instruction_ref ins) const { return different(ins->inputs()); }
-    std::vector<std::size_t> wait_for(instruction_ref ins) const
-    {
-        std::set<std::size_t> result;
-        auto s = get_stream(ins);
-        for(auto i : ins->inputs())
    {
-            auto stream = get_stream(i);
-            if(stream != s)
-                result.insert(stream);
-        }
-        return {result.begin(), result.end()};
-    }
-};
-void schedule::apply(program& p) const
-{
-    const std::size_t min_partition_threshold = 2;
-    // Compute accumulated weights
-    std::unordered_map<instruction_ref, std::size_t> weights;
-    auto last = std::prev(p.end());
        fix<std::size_t>([&](auto self, auto ins) -> std::size_t {
            if(weights.count(ins) == 0)
            {
@@ -65,23 +33,24 @@ void schedule::apply(program& p) const
            }
            return weights[ins];
        })(last);
+    }
-    // Assign streams
+    void assign_streams(program& p, std::size_t streams)
-    auto streams = model.concurrency();
+    {
-    stream_info si;
+        const std::size_t min_partition_threshold = 2;
        for(std::size_t stream = 0; stream < streams; stream++)
        {
            fix([&](auto self, auto ins) {
                // Only assign streams fi not already assigned
-            if(not si.has_stream(ins))
+                if(not has_stream(ins))
-                si.set_stream(ins, stream);
+                    set_stream(ins, stream);
                instruction_ref child = p.end();
                std::size_t w         = 0;
                for(auto i : ins->inputs())
                {
                    const auto weight = weights[i];
                    // Skip instruction that already have stream assignment or too low of weights
-                if(si.has_stream(i) or weight <= min_partition_threshold)
+                    if(has_stream(i) or weight <= min_partition_threshold)
                    {
                        self(i);
                    }
@@ -94,16 +63,58 @@ void schedule::apply(program& p) const
                }
                if(child != p.end())
                    self(child);
-        })(last);
+            })(std::prev(p.end()));
        }
        // Assign remaining instructions
        for(auto ins : iterator_for(p))
        {
-        if(si.has_stream(ins))
+            if(has_stream(ins))
                continue;
-        si.set_stream(ins, streams - 1);
+            set_stream(ins, streams - 1);
+        }
+    }
+    void set_stream(instruction_ref ins, std::size_t n) { ins2stream[ins] = n; }
+    std::size_t get_stream(instruction_ref ins) const { return ins2stream.at(ins); }
+    bool has_stream(instruction_ref ins) const { return ins2stream.count(ins) > 0; }
+    bool different(const std::vector<instruction_ref>& v) const
+    {
+        if(v.size() < 2)
+            return false;
+        auto stream = get_stream(v.front());
+        return not std::all_of(
+            v.begin(), v.end(), [&](instruction_ref x) { return get_stream(x) == stream; });
    }
+    bool is_split_point(instruction_ref ins) const { return different(ins->outputs()); }
+    bool is_merge_point(instruction_ref ins) const { return different(ins->inputs()); }
+    std::vector<std::size_t> wait_for(instruction_ref ins) const
+    {
+        std::set<std::size_t> result;
+        auto s = get_stream(ins);
+        for(auto i : ins->inputs())
+        {
+            auto stream = get_stream(i);
+            if(stream != s)
+                result.insert(stream);
+        }
+        return {result.begin(), result.end()};
+    }
+};
+void schedule::apply(program& p) const
+{
+    stream_info si;
+    auto last = std::prev(p.end());
+    si.accumulate_weights(last, model);
+    si.assign_streams(p, model.concurrency());
    // Topo sort
    fix([&](auto self, auto ins) {
        for(auto i : ins->inputs())

--- a/src/targets/gpu/schedule_model.cpp
+++ b/src/targets/gpu/schedule_model.cpp
@@ -79,7 +79,7 @@ void schedule_model::wait(program& p,
 static std::unordered_map<std::string, std::size_t> create_weight_map()
 {
    return {
-        {"hip::load_literal", 1},
+        {"hip::load_literal", 0},
        {"hip::allocate", 0},
        {"gpu::convolution", 4},
        {"gpu::conv_bias_relu", 4},