Add stream instructions

19ed10f0 · Paul · 0721ddc1 · 19ed10f0 · 19ed10f0 · 19ed10f0
Commit 19ed10f0 authored Mar 01, 2019 by Paul
4 changed files
--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -17,7 +17,12 @@ struct hip_device
 {
    hip_device() { add_stream(); }
-    hip_device(std::size_t id) : device_id(id) { add_stream(); }
+    hip_device(std::size_t id, std::size_t n) 
+    : device_id(id) 
+    {
+        for(std::size_t i = 0;i< n;i++)
+            add_stream(); 
+    }
    struct stream
    {
@@ -77,6 +82,22 @@ struct hip_device
            return rbhandle.get();
        }
+        void wait(hipEvent_t event)
+        {
+            setup();
+            auto status = hipStreamWaitEvent(get(), event, 0);
+            if(status != hipSuccess)
+                MIGRAPHX_THROW("Failed to wait.");
+        }
+        void record(hipEvent_t event)
+        {
+            setup();
+            auto status = hipEventRecord(event, get());
+            if(status != hipSuccess)
+                MIGRAPHX_THROW("Failed to record.");
+        }
        private:
        std::size_t id                      = 0;
        shared<hip_stream_ptr> s            = nullptr;
@@ -87,9 +108,13 @@ struct hip_device
    void add_stream() { streams.emplace_back(device_id); }
    stream& get_stream() { return streams.at(current_stream); }
+    stream& get_stream(std::size_t n) { return streams.at(n); }
    void set_stream(std::size_t n) { current_stream = n; }
+    std::size_t nstreams() const { return streams.size(); }
    private:
    std::size_t device_id      = 0;
    std::size_t current_stream = 0;
@@ -98,7 +123,7 @@ struct hip_device
 struct context
 {
-    context(std::size_t n = 0) : current_device(std::make_shared<hip_device>(n)) {}
+    context(std::size_t device_id = 0, std::size_t n = 4) : current_device(std::make_shared<hip_device>(device_id, n)) {}
    hip_device& get_current_device()
    {
@@ -107,6 +132,9 @@ struct context
    }
    hip_device::stream& get_stream() { return get_current_device().get_stream(); }
+    hip_device::stream& get_stream(std::size_t n) { return get_current_device().get_stream(n); }
+    void set_stream(std::size_t n) { get_current_device().set_stream(n); }
    std::vector<argument> literals{};
    void finish() const { gpu_sync(); }

--- a/src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
@@ -15,7 +15,7 @@ namespace gpu {
 struct schedule_model
 {
-    std::size_t n = 4;
+    std::size_t streams = 0;
    std::size_t concurrency() const;
    void schedule_instruction(program& p, instruction_ref ins, std::size_t n) const;
    void wait(program& p,

--- a/src/targets/gpu/schedule_model.cpp
+++ b/src/targets/gpu/schedule_model.cpp
 #include <migraphx/gpu/schedule_model.hpp>
+#include <migraphx/gpu/context.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/operation.hpp>
@@ -6,16 +7,107 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
+using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy);
+hip_event_ptr create_event()
+{
+    hipEvent_t event;
+    auto status = hipEventCreateWithFlags(&event, hipEventDisableTiming);
+    if(status != hipSuccess)
+        MIGRAPHX_THROW("Failed to create event");
+    return hip_event_ptr{event};
+}
+struct wait_event
+{
+    std::vector<std::size_t> wait_for;
+    shared<hip_event_ptr> event;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.wait_for, "wait_for"));
+    }
+    std::string name() const { return "gpu::wait_event"; }
+    shape compute_shape(const std::vector<shape>&) const { return {}; }
+    argument compute(context& ctx, const shape&, const std::vector<argument>&) const
+    {
+        assert(event != nullptr);
+        for(auto n:wait_for)
+            ctx.get_stream(n).record(event.get());
+        ctx.get_stream().wait(event.get());
+        return {};
+    }
+    void finalize(context& ctx, const shape&, std::vector<shape>)
+    {
+        event = create_event();
+    }
+};
+struct set_stream
+{
+    std::size_t stream = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.stream, "stream"));
+    }
+    std::string name() const { return "gpu::set_stream"; }
+    shape compute_shape(const std::vector<shape>&) const { return {}; }
+    argument compute(context& ctx, const shape&, const std::vector<argument>&) const
+    {
+        assert(stream >= 0);
+        ctx.set_stream(stream);
+        return {};
+    }
+    void finalize(context& ctx, const shape&, const std::vector<shape>&) { ctx.set_stream(stream); }
+};
 std::size_t schedule_model::concurrency() const { return n; }
-void schedule_model::schedule_instruction(program& p, instruction_ref ins, std::size_t n) const {}
+void schedule_model::schedule_instruction(program& p, instruction_ref ins, std::size_t n) const 
+{
+    p.insert_instruction(ins, set_stream{n});
+}
 void schedule_model::wait(program& p,
                          instruction_ref ins,
                          std::size_t wait_on,
                          const std::vector<std::size_t>& wait_for) const
 {
+    p.insert_instruction(ins, set_stream{wait_on});
+    p.insert_instruction(ins, wait_event{wait_for});
+}
+static std::unordered_map<std::string, std::size_t> create_weight_map()
+{
+    return {
+        {"hip::load_literal", 1},
+        {"hip::allocate", 0},
+        {"gpu::convolution", 4},
+        {"gpu::conv_bias_relu", 4},
+        {"gpu::pooling", 2},
+        {"gpu::gemm", 2},
+        {"gpu::concat", 1},
+        {"hip::add_relu", 2},
+    };
+}
+static const std::unordered_map<std::string, std::size_t>& weight_map()
+{
+    static std::unordered_map<std::string, std::size_t> m = create_weight_map();
+    return m;
+}
+std::size_t schedule_model::weight(const operation& op) const 
+{
+    if (weight_map().count(op.name()) == 0)
+        return 0; 
+    return weight_map().at(op.name()); 
 }
-std::size_t schedule_model::weight(const operation& op) const { return 1; }
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
\ No newline at end of file
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -53,7 +53,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
        fuse_ops{&ctx},
        dead_code_elimination{},
        write_literals{&ctx},
-        schedule{gpu::schedule_model{}},
+        schedule{gpu::schedule_model{ctx.get_current_device().nstreams()}},
        memory_coloring{"hip::allocate"},
        eliminate_workspace{},
        eliminate_allocation{"hip::allocate"},