Format

7d5365d0 · Paul · b7b44c01 · 7d5365d0 · 7d5365d0 · 7d5365d0
Commit 7d5365d0 authored Jul 06, 2022 by Paul
6 changed files
--- a/src/targets/gpu/driver/compile_op.cpp
+++ b/src/targets/gpu/driver/compile_op.cpp
@@ -38,7 +38,7 @@ struct compile_op : action<compile_op>
        context ctx;
        auto inputs = p.parse_shapes(v.at("inputs"));
        auto op     = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v);
-        auto [host_time, device_time]    = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
+        auto [host_time, device_time] = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
        std::cout << op << ": " << host_time << "ms" << std::endl;
    }
 };

--- a/src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
+++ b/src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace driver {

-std::pair<double, double> time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
+std::pair<double, double>
+time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);

 } // namespace driver
 } // namespace gpu

--- a/src/targets/gpu/driver/perf.cpp
+++ b/src/targets/gpu/driver/perf.cpp
@@ -42,13 +42,14 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
 }

 using milliseconds = std::chrono::duration<double, std::milli>;
-std::pair<double, double> time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
+std::pair<double, double>
+time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
 {

    // TODO: Use std::ref
    migraphx::context ctx = ictx;
-    auto& gctx = any_cast<migraphx::gpu::context>(ctx);
-    auto output            = op.compute_shape(inputs);
+    auto& gctx            = any_cast<migraphx::gpu::context>(ctx);
+    auto output           = op.compute_shape(inputs);
    op.finalize(ctx, output, inputs);
    auto args = generate_arguments(inputs);
    auto run  = [&] {
@@ -57,9 +58,9 @@ std::pair<double, double> time_op(context& ictx, operation op, const std::vector
    };
    gctx.enable_perf_measurement();
    run();
-    double host_time = 0.0;
+    double host_time   = 0.0;
    double device_time = 0.0;
-    for(auto i:range(n))
+    for(auto i : range(n))
    {
        (void)i;
        host_time += time<milliseconds>(run);

--- a/src/targets/gpu/include/migraphx/gpu/context.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/context.hpp
@@ -199,7 +199,6 @@ struct context
    context(std::size_t device_id = 0, std::size_t n = value_of(MIGRAPHX_NSTREAMS{}, 1))
        : current_device(std::make_shared<hip_device>(device_id, n))
    {
-
    }

    hip_device& get_current_device()
@@ -279,24 +278,24 @@ struct context

    void enable_perf_measurement(bool b = true)
    {
-        if (b)
+        if(b)
        {
            start_event = create_event_for_timing();
-            stop_event = create_event_for_timing();
+            stop_event  = create_event_for_timing();
            get_stream().record(start_event.get());
            get_stream().record(stop_event.get());
        }
        else
        {
            start_event = nullptr;
-            stop_event = nullptr;
+            stop_event  = nullptr;
        }
        measure_perf = b;
    }

    std::pair<hipEvent_t, hipEvent_t> get_perf_events() const
    {
-        if (measure_perf)
+        if(measure_perf)
            return std::make_pair(start_event.get(), stop_event.get());
        return std::make_pair(nullptr, nullptr);
    }
@@ -304,10 +303,10 @@ struct context
    float get_elapsed_ms() const
    {
        float result = 0;
-        if (start_event != nullptr and stop_event != nullptr)
+        if(start_event != nullptr and stop_event != nullptr)
        {
            auto status = hipEventElapsedTime(&result, start_event.get(), stop_event.get());
-            if (status != hipSuccess)
+            if(status != hipSuccess)
                MIGRAPHX_THROW("Failed hipEventElapsedTime: " + hip_error(status));
        }
        return result;
@@ -317,9 +316,9 @@ struct context
    // TODO: Make this a vector to support multiple devices
    std::shared_ptr<hip_device> current_device;
    std::vector<shared<hip_event_ptr>> events;
-    bool measure_perf = false;
+    bool measure_perf                 = false;
    shared<hip_event_ptr> start_event = nullptr;
-    shared<hip_event_ptr> stop_event = nullptr;
+    shared<hip_event_ptr> stop_event  = nullptr;
 };

 inline void migraphx_to_value(value& v, const context& ctx) { v = ctx.to_value(); }

--- a/src/targets/gpu/include/migraphx/gpu/kernel.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/kernel.hpp
@@ -50,14 +50,18 @@ struct kernel
    void launch(hipStream_t stream,
                std::size_t global,
                std::size_t local,
-                const std::vector<kernel_argument>& args, hipEvent_t start = nullptr, hipEvent_t stop = nullptr) const;
+                const std::vector<kernel_argument>& args,
+                hipEvent_t start = nullptr,
+                hipEvent_t stop  = nullptr) const;

    void launch(hipStream_t stream,
                std::size_t global,
                std::size_t local,
-                std::vector<void*> args, hipEvent_t start = nullptr, hipEvent_t stop = nullptr) const;
+                std::vector<void*> args,
+                hipEvent_t start = nullptr,
+                hipEvent_t stop  = nullptr) const;

-    template<class... Ts>
+    template <class... Ts>
    auto launch(hipStream_t stream, std::size_t global, std::size_t local, Ts... zs) const
    {
        return [=](auto&&... xs) {

--- a/src/targets/gpu/kernel.cpp
+++ b/src/targets/gpu/kernel.cpp
@@ -80,7 +80,9 @@ void launch_kernel(hipFunction_t fun,
                   std::size_t global,
                   std::size_t local,
                   void* kernargs,
-                   std::size_t size, hipEvent_t start, hipEvent_t stop)
+                   std::size_t size,
+                   hipEvent_t start,
+                   hipEvent_t stop)
 {
    assert(global > 0);
    assert(local > 0);
@@ -97,11 +99,22 @@ void launch_kernel(hipFunction_t fun,
 #endif
    };

-    auto status = hipExtModuleLaunchKernel(
-        fun, global, 1, 1, local, 1, 1, 0, stream, nullptr, reinterpret_cast<void**>(&config), start, stop);
+    auto status = hipExtModuleLaunchKernel(fun,
+                                           global,
+                                           1,
+                                           1,
+                                           local,
+                                           1,
+                                           1,
+                                           0,
+                                           stream,
+                                           nullptr,
+                                           reinterpret_cast<void**>(&config),
+                                           start,
+                                           stop);
    if(status != hipSuccess)
        MIGRAPHX_THROW("Failed to launch kernel: " + hip_error(status));
-    if (stop)
+    if(stop)
    {
        status = hipEventSynchronize(stop);
        if(status != hipSuccess)
@@ -112,7 +125,9 @@ void launch_kernel(hipFunction_t fun,
 void kernel::launch(hipStream_t stream,
                    std::size_t global,
                    std::size_t local,
-                    std::vector<void*> args, hipEvent_t start, hipEvent_t stop) const
+                    std::vector<void*> args,
+                    hipEvent_t start,
+                    hipEvent_t stop) const
 {
    assert(impl != nullptr);
    void* kernargs   = args.data();
@@ -124,7 +139,9 @@ void kernel::launch(hipStream_t stream,
 void kernel::launch(hipStream_t stream,
                    std::size_t global,
                    std::size_t local,
-                    const std::vector<kernel_argument>& args, hipEvent_t start, hipEvent_t stop) const
+                    const std::vector<kernel_argument>& args,
+                    hipEvent_t start,
+                    hipEvent_t stop) const
 {
    assert(impl != nullptr);
    std::vector<char> kernargs = pack_args(args);