Use find_2.0 API for the convolution (#1346)

Improvements/Additions to be made: changes for the quant_convolution, changes for the deconvolution, Macros for MIOpen status checks

Use find_2.0 API for the convolution (#1346)
Improvements/Additions to be made: changes for the quant_convolution, changes for the deconvolution, Macros for MIOpen status checks
e19f78ae · Umang Yadav · GitHub · c2842c1e · e19f78ae · e19f78ae
Unverified Commit e19f78ae authored Sep 29, 2022 by Umang Yadav Committed by GitHub Sep 29, 2022
10 changed files
--- a/src/include/migraphx/streamutils.hpp
+++ b/src/include/migraphx/streamutils.hpp
@@ -28,6 +28,7 @@
 #include <algorithm>
 #include <migraphx/rank.hpp>
 #include <migraphx/config.hpp>
+#include <vector>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -59,28 +60,35 @@ inline stream_range_container<Range> stream_range(const Range& r)
 namespace detail {
-inline void stream_write_value_impl(rank<2>, std::ostream& os, const std::string& x) { os << x; }
+template <class T>
+auto stream_write_value_impl(rank<1>, std::ostream& os, const T& x) -> decltype(os << x, void())
+{
+    os << x;
+}
-template <class Range>
+template <class T>
-auto stream_write_value_impl(rank<1>, std::ostream& os, const Range& r)
+void stream_write_value_impl(rank<1>, std::ostream& os, const std::vector<T>& r)
-    -> decltype(r.begin(), r.end(), void())
 {
    os << "{";
    os << stream_range(r);
    os << "}";
 }
-template <class T>
+template <class Range>
-void stream_write_value_impl(rank<0>, std::ostream& os, const T& x)
+auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r)
+    -> decltype(r.begin(), r.end(), void())
 {
-    os << x;
+    os << "{";
+    os << stream_range(r);
+    os << "}";
 }
 } // namespace detail
 template <class T>
 void stream_write_value(std::ostream& os, const T& x)
 {
-    detail::stream_write_value_impl(rank<2>{}, os, x);
+    detail::stream_write_value_impl(rank<1>{}, os, x);
 }
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/include/migraphx/value.hpp
+++ b/src/include/migraphx/value.hpp
@@ -184,6 +184,12 @@ struct value
        {
        }
        explicit binary(std::size_t s) : base(s) {}
+        friend std::ostream& operator<<(std::ostream& os, const binary& obj)
+        {
+            os << "{binary_object: " << obj.size() << "}";
+            return os;
+        }
    };
    value() = default;

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -239,9 +239,18 @@ endif()
 include(CheckLibraryExists)
 get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
 check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
+check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
+if(HAS_FIND_2_API) 
+    target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
+    message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
+else()
+    message(STATUS "MIOpen does not have Find-2.0 API")
+endif()
 if(HAS_FIND_MODE_API)
    target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API)
-    message(STATUS "MIOpen has find mode api")
+    message(STATUS "MIGraphx is using Find Mode API of MIOpen")
 else()
    message(STATUS "MIOpen does not have find mode api")
 endif()

--- a/src/targets/gpu/compile_gen.cpp
+++ b/src/targets/gpu/compile_gen.cpp
@@ -131,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
    std::size_t bytes = 0;
    for(auto i : preloaded)
    {
-        auto input = inputs[i];
+        const auto& input = inputs[i];
        bytes += input.bytes();
        if(bytes > max_lds_bytes)
            break;

--- a/src/targets/gpu/convolution.cpp
+++ b/src/targets/gpu/convolution.cpp
@@ -24,6 +24,7 @@
 #include <migraphx/gpu/convolution.hpp>
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/generate.hpp>
+#include <miopen/miopen.h>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -55,14 +56,40 @@ argument miopen_convolution::compute(context& ctx,
                                     const shape& output_shape,
                                     const std::vector<argument>& args) const
 {
-    auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()));
+    auto x_desc                = make_tensor(reshape_if_1d(args[0].get_shape()));
-    auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
+    auto w_desc                = make_tensor(reshape_if_1d(args[1].get_shape()));
-    auto y_desc = make_tensor(reshape_if_1d(output_shape));
+    auto y_desc                = make_tensor(reshape_if_1d(output_shape));
+    auto* miopen_stream_handle = ctx.get_stream().get_miopen();
+    auto workspace_size        = args[2].get_shape().bytes();
+#ifdef MIGRAPHX_HAS_FIND_2_API
+    {
+        const miopenTensorArgument_t tensor_args[3] = {
+            {miopenTensorConvolutionX, nullptr, args[0].implicit()},
+            {miopenTensorConvolutionW, nullptr, args[1].implicit()},
+            {miopenTensorConvolutionY, nullptr, args[3].implicit()},
+        };
+        if(solution_ptr.get() == nullptr)
+            MIGRAPHX_THROW("MIOpen Convolution : Load MIOpen Solution before running it");
+        auto status = miopenRunSolution(miopen_stream_handle,
+                                        solution_ptr.get(),
+                                        3,
+                                        tensor_args,
+                                        args[2].implicit(),
+                                        workspace_size);
+        if(status != miopenStatusSuccess)
+            MIGRAPHX_THROW("MIOpen Convolution: running convolution using find_2.0 failed");
+        return args[3];
+    }
+#else
+    // else use immediate mode
    if(solution_id == 0)
        MIGRAPHX_THROW("MIOpen Convolution: invalid solution ID");
-    auto status = miopenConvolutionForwardImmediate(ctx.get_stream().get_miopen(),
+    auto status = miopenConvolutionForwardImmediate(miopen_stream_handle,
                                                    w_desc.get(),
                                                    args[1].implicit(),
                                                    x_desc.get(),
@@ -71,29 +98,66 @@ argument miopen_convolution::compute(context& ctx,
                                                    y_desc.get(),
                                                    args[3].implicit(),
                                                    args[2].implicit(),
-                                                    args[2].get_shape().bytes(),
+                                                    workspace_size,
                                                    solution_id);
    if(status != miopenStatusSuccess)
        MIGRAPHX_THROW("MIOpen Convolution: running convolution failed");
    return args[3];
+#endif
 }
 shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
 {
    shape workspace_shape{};
+    auto x_desc                = make_tensor(reshape_if_1d(inputs[0]));
+    auto w_desc                = make_tensor(reshape_if_1d(inputs[1]));
+    auto y_desc                = make_tensor(reshape_if_1d(output_shape));
+    std::size_t workspace_size = 0;
-    auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
+#ifdef MIGRAPHX_HAS_FIND_2_API
-    auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
+    {
-    auto y_desc = make_tensor(reshape_if_1d(output_shape));
+        auto conv_problem = make_obj<miopen_problem>(
+            &miopenCreateConvProblem, cd.get(), miopenProblemDirectionForward);
+        set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
+        set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
+        set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
+        auto* miopen_stream_handle = ctx.get_stream().get_miopen();
+        solution_ptr = find_solution(miopen_stream_handle, conv_problem.get());
+        auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
+        if(status != miopenStatusSuccess)
+            MIGRAPHX_THROW("MIOpen Convolution : failed to get solution's workspace size");
+        std::size_t solution_size;
+        status = miopenGetSolutionSize(solution_ptr.get(), &solution_size);
+        if(status != miopenStatusSuccess)
+            MIGRAPHX_THROW("MIOpen Convolution: Failed to fetch solution size");
+        auto solution_binary = std::vector<char>{};
+        solution_binary.resize(solution_size);
+        status = miopenSaveSolution(solution_ptr.get(), solution_binary.data());
+        if(status != miopenStatusSuccess)
+            MIGRAPHX_THROW("MIOpen Convolution: Saving solution failed");
+        solution_object = value::binary{solution_binary.data(), solution_size};
+        return shape{shape::int8_type, {workspace_size}};
+    }
+#else
+    // else use immediate find mode
+    auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
+                                                           w_desc.get(),
+                                                           x_desc.get(),
+                                                           cd.get(),
+                                                           y_desc.get(),
+                                                           &workspace_size);
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen Convolution: Failed to get forward workspace size");
-    std::size_t workspace_size = 0;
-    miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
-                                             w_desc.get(),
-                                             x_desc.get(),
-                                             cd.get(),
-                                             y_desc.get(),
-                                             &workspace_size);
    workspace_shape = shape{shape::int8_type, {workspace_size}};
    auto x         = to_gpu(generate_argument(inputs[0]));
@@ -103,20 +167,20 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
    int algo_count = 1;
    miopenConvAlgoPerf_t perf;
-    auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
+    status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
-                                                        x_desc.get(),
+                                                   x_desc.get(),
-                                                        x.implicit(),
+                                                   x.implicit(),
-                                                        w_desc.get(),
+                                                   w_desc.get(),
-                                                        w.implicit(),
+                                                   w.implicit(),
-                                                        cd.get(),
+                                                   cd.get(),
-                                                        y_desc.get(),
+                                                   y_desc.get(),
-                                                        y.implicit(),
+                                                   y.implicit(),
-                                                        1,
+                                                   1,
-                                                        &algo_count,
+                                                   &algo_count,
-                                                        &perf,
+                                                   &perf,
-                                                        workspace.implicit(),
+                                                   workspace.implicit(),
-                                                        workspace_size,
+                                                   workspace_size,
-                                                        false);
+                                                   false);
    if(status != miopenStatusSuccess)
        MIGRAPHX_THROW("MIOpen Convolution: find convolution failed");
    algo = perf.fwd_algo;
@@ -148,35 +212,58 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
    solution_id = solutions.front().solution_id;
    return shape{shape::int8_type, {perf.memory}};
+#endif
 }
 void miopen_convolution::finalize(context& ctx,
                                  const shape& output_shape,
-                                  std::vector<shape> inputs)
+                                  const std::vector<shape>& inputs)
 {
-    if(cd == nullptr)
+#ifdef MIGRAPHX_HAS_FIND_2_API
-        cd = make_conv(op);
-    if(solution_id == 0)
    {
-        // Check that workspace hasn't changed
+        (void)(ctx); // avoid warnings
-        auto size = inputs.at(2).bytes();
+        (void)(output_shape);
-        auto ws   = find(ctx, output_shape, inputs);
+        (void)(inputs);
-        if(ws.bytes() > size)
+        // load solution
-            MIGRAPHX_THROW("MIOpen Convolution: workspace has changed during finalization.");
+        if(solution_ptr == nullptr)
+        {
+            miopenSolution_t ptr;
+            auto status  = miopenLoadSolution(&ptr,
+                                             reinterpret_cast<const char*>(solution_object.data()),
+                                             solution_object.size());
+            solution_ptr = miopen_solution{ptr};
+            if(status != miopenStatusSuccess)
+                MIGRAPHX_THROW("MIOpen Convolution: loading convolution solution failed");
+        }
    }
+#else
+    // Use immediate mode API
+    {
+        if(cd == nullptr)
+            cd = make_conv(op);
+        if(solution_id == 0)
+        {
+            // Check that workspace hasn't changed
+            auto size = inputs.at(2).bytes();
+            auto ws   = find(ctx, output_shape, inputs);
+            if(ws.bytes() > size)
+                MIGRAPHX_THROW("MIOpen Convolution: workspace has changed during finalization.");
+        }
-    auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
+        auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
-    auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
+        auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
-    auto y_desc = make_tensor(reshape_if_1d(output_shape));
+        auto y_desc = make_tensor(reshape_if_1d(output_shape));
-    auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
+        auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
-                                                          w_desc.get(),
+                                                              w_desc.get(),
-                                                          x_desc.get(),
+                                                              x_desc.get(),
-                                                          cd.get(),
+                                                              cd.get(),
-                                                          y_desc.get(),
+                                                              y_desc.get(),
-                                                          solution_id);
+                                                              solution_id);
-    if(status != miopenStatusSuccess)
+        if(status != miopenStatusSuccess)
-        MIGRAPHX_THROW("MIOpen Convolution: compile solution failed");
+            MIGRAPHX_THROW("MIOpen Convolution: compile solution failed");
+    }
+#endif
 }
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
@@ -39,6 +39,10 @@ struct miopen_convolution
    op::convolution op;
    shared<convolution_descriptor> cd = nullptr;
    miopenConvFwdAlgorithm_t algo{};
+#ifdef MIGRAPHX_HAS_FIND_2_API
+    value::binary solution_object{};
+    shared<miopen_solution> solution_ptr = nullptr;
+#endif
    uint64_t solution_id = 0;
    template <class Self, class F>
@@ -49,6 +53,9 @@ struct miopen_convolution
                    f(self.op.dilation, "dilation"),
                    f(self.op.group, "group"),
                    f(self.op.padding_mode, "padding_mode"),
+#ifdef MIGRAPHX_HAS_FIND_2_API
+                    f(self.solution_object, "solution_object"),
+#endif
                    f(self.solution_id, "solution_id"));
    }
@@ -57,7 +64,7 @@ struct miopen_convolution
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
    shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
-    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    void finalize(context& ctx, const shape& output_shape, const std::vector<shape>& inputs);
    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {
        return shapes.size() - 1;

--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
@@ -70,6 +70,34 @@ Result make_obj(F f, Ts... xs)
    return r;
 }
+#ifdef MIGRAPHX_HAS_FIND_2_API
+using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestroyFindOptions);
+using miopen_problem      = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
+using miopen_solution     = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
+inline miopen_solution find_solution(miopenHandle_t handle, miopenProblem_t problem)
+{
+    miopenSolution_t solution;
+    size_t found = 0;
+    auto status  = miopenFindSolutions(handle, problem, nullptr, &solution, &found, 1);
+    auto result  = miopen_solution{solution};
+    if(status != miopenStatusSuccess or found == 0)
+        MIGRAPHX_THROW("MIOpen miopenFindSolutions failed");
+    return result;
+}
+inline void set_tensor_descriptor(miopenTensorArgumentId_t name,
+                                  tensor_descriptor& desc,
+                                  miopen_problem& problem_ptr)
+{
+    auto status = miopenSetProblemTensorDescriptor(problem_ptr.get(), name, desc.get());
+    if(status != miopenStatusSuccess)
+    {
+        MIGRAPHX_THROW("setting problem tensor description failed");
+    }
+}
+#endif
 inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = false)
 {
    auto s = os.normalize_standard();

--- a/src/targets/gpu/jit/gathernd.cpp
+++ b/src/targets/gpu/jit/gathernd.cpp
@@ -65,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler>
    operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
    {
        hip_compile_options options;
-        auto out_s = inputs.back();
+        const auto& out_s = inputs.back();
        options.set_launch_params(v, compute_global_for(ctx, out_s.elements()));
        options.inputs         = inputs;
        options.output         = out_s;

--- a/src/value.cpp
+++ b/src/value.cpp
@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
    os << "}";
 }
-void print_value(std::ostream& os, const value::binary& x)
+void print_value(std::ostream& os, const value::binary& x) { os << x; }
-{
-    // Convert binary to integers
-    std::vector<int> v(x.begin(), x.end());
-    os << "{";
-    os << to_string_range(v);
-    os << "}";
-}
 std::ostream& operator<<(std::ostream& os, const value& d)
 {

--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -37,10 +37,6 @@
 #include <migraphx/functional.hpp>
 #include <test.hpp>
-using migraphx::trim;
-// m test_gpu_mlir && ./bin/test_gpu_mlir
 struct mlir_gpu_target : migraphx::gpu::target
 {
    std::string name() const { return "mlir"; }