Prioritizing int8 over int8x4 when it is applicable (#1218)

prioritizing int8 over int8x4 when it is applicable Amend return to continue in apply loop Adding error handling in case int8x4 compilation failed Co-authored-by: Paul Fultz II <pfultz2@yahoo.com>

Prioritizing int8 over int8x4 when it is applicable (#1218)
prioritizing int8 over int8x4 when it is applicable Amend return to continue in apply loop Adding error handling in case int8x4 compilation failed Co-authored-by: Paul Fultz II <pfultz2@yahoo.com>
37c47504 · Zhuoran Yin · GitHub · 7271ddbc · 37c47504 · 37c47504
Unverified Commit 37c47504 authored Jun 07, 2022 by Zhuoran Yin Committed by GitHub Jun 07, 2022
5 changed files
--- a/src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
@@ -2,6 +2,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP

 #include <migraphx/shape.hpp>
+#include <migraphx/reflect.hpp>
 #include <migraphx/op/quant_convolution.hpp>
 #include <migraphx/gpu/miopen.hpp>

@@ -14,6 +15,7 @@ struct context;
 struct miopen_quant_convolution
 {
    op::quant_convolution op;
+    bool int8_x4_format = false;
    shared<convolution_descriptor> cd;
    miopenConvFwdAlgorithm_t algo{};
    miopenHandle_t handle = nullptr;
@@ -22,7 +24,8 @@ struct miopen_quant_convolution
    static auto reflect(Self& self, F f)
    {
        // TODO: Add algo
-        return op::quant_convolution::reflect(self.op, f);
+        return pack_join(migraphx::reflect(self.op, f),
+                         pack(f(self.int8_x4_format, "int8_x4_format")));
    }

    std::string name() const { return "gpu::quant_convolution"; }

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -365,8 +365,22 @@ struct miopen_apply
    {
        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
-            auto conv = miopen_quant_convolution{op, make_conv(op)};
-            auto ws   = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
+            shape ws;
+            miopen_quant_convolution conv;
+            auto compile_quant_conv_with_format = [&](bool format) {
+                conv = miopen_quant_convolution{op, format, make_conv(op)};
+                ws   = conv.compile(get_context(), ins->get_shape(), to_shapes(ins->inputs()));
+            };
+
+            try
+            {
+                compile_quant_conv_with_format(int8_x4_format);
+            }
+            catch(migraphx::exception&)
+            {
+                // In case no solver supports the default format, retry using the other format.
+                compile_quant_conv_with_format(!int8_x4_format);
+            }

            auto args      = ins->inputs();
            auto workspace = insert_allocation(ins, ws, "workspace");

--- a/src/targets/gpu/pack_int8_args.cpp
+++ b/src/targets/gpu/pack_int8_args.cpp
@@ -118,7 +118,7 @@ void pack_int8_args::apply(module& m) const
            assert(val.contains("int8_x4_format"));
            if(not val.at("int8_x4_format").to<bool>())
            {
-                return;
+                continue;
            }
            auto inputs = ins->inputs();
            auto lens   = inputs.at(0)->get_shape().lens();
@@ -156,6 +156,12 @@ void pack_int8_args::apply(module& m) const
        }
        else if(ins->name() == "gpu::quant_convolution")
        {
+            auto val = ins->get_operator().to_value();
+            if(not val.at("int8_x4_format").to<bool>())
+            {
+                continue;
+            }
+
            auto inputs   = ins->inputs();
            auto packed_x = m.insert_instruction(
                ins,

--- a/src/targets/gpu/quant_convolution.cpp
+++ b/src/targets/gpu/quant_convolution.cpp
@@ -16,8 +16,8 @@ argument miopen_quant_convolution::compute(context& ctx,
                                           const shape& output_shape,
                                           const std::vector<argument>& args) const
 {
-    auto x_desc = make_tensor(args[0].get_shape(), true);
-    auto w_desc = make_tensor(args[1].get_shape(), true);
+    auto x_desc = make_tensor(args[0].get_shape(), int8_x4_format);
+    auto w_desc = make_tensor(args[1].get_shape(), int8_x4_format);
    auto y_desc = make_tensor(output_shape);

    float alpha = 1;
@@ -49,8 +49,8 @@ shape miopen_quant_convolution::compile(context& ctx,
                                        std::vector<shape> inputs)
 {
    shape workspace_shape{};
-    auto x_desc = make_tensor(inputs[0], true);
-    auto w_desc = make_tensor(inputs[1], true);
+    auto x_desc = make_tensor(inputs[0], int8_x4_format);
+    auto w_desc = make_tensor(inputs[1], int8_x4_format);
    auto y_desc = make_tensor(output_shape);

    std::size_t workspace_size = 0;
@@ -62,8 +62,15 @@ shape miopen_quant_convolution::compile(context& ctx,
                                             &workspace_size);
    workspace_shape = shape{shape::int8_type, {workspace_size}};

-    auto arg_vec4_x = to_gpu(generate_argument(pack_int8_shape(inputs[0])));
-    auto arg_vec4_w = to_gpu(generate_argument(pack_int8_shape(inputs[1])));
+    auto x_shape = inputs[0];
+    auto w_shape = inputs[1];
+    if(int8_x4_format)
+    {
+        x_shape = pack_int8_shape(x_shape);
+        w_shape = pack_int8_shape(w_shape);
+    }
+    auto arg_vec4_x = to_gpu(generate_argument(x_shape));
+    auto arg_vec4_w = to_gpu(generate_argument(w_shape));
    auto y          = allocate_gpu(output_shape);
    auto workspace  = allocate_gpu(workspace_shape);


--- a/test/verify/quant_conv_int8x4_default.cpp
+++ b/test/verify/quant_conv_int8x4_default.cpp
+
+#include "verify_program.hpp"
+#include <migraphx/program.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/op/quant_convolution.hpp>
+
+struct quant_conv_int8x4_default : verify_program<quant_conv_int8x4_default>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto* mm = p.get_main_module();
+        migraphx::shape a_shape{migraphx::shape::int8_type, {16, 16, 4, 4}};
+        auto pa = mm->add_parameter("a", a_shape);
+        migraphx::shape c_shape{migraphx::shape::int8_type, {16, 16, 3, 3}};
+        auto pc = mm->add_parameter("c", c_shape);
+        mm->add_instruction(
+            migraphx::op::quant_convolution{{{0, 0}}, {{1, 1}}, {{1, 1}}, migraphx::op::same},
+            pa,
+            pc);
+        return p;
+    }
+};