some code refinement related to quantization.

7f3a960b · Shucai Xiao · f8613dd1 · 7f3a960b · 7f3a960b · 7f3a960b
Commit 7f3a960b authored Jun 10, 2019 by Shucai Xiao
4 changed files
--- a/src/include/migraphx/op/convert.hpp
+++ b/src/include/migraphx/op/convert.hpp
@@ -42,10 +42,10 @@ struct convert : unary<convert>
            float res = scale * x + shift;
            if(target_type == shape::int8_type)
            {
-                int factor = (res > 0) ? 1 : -1;
+                int factor = (res >= 0.0f) ? 1 : -1;
                res        = res + factor * 0.5f;
-                res        = res > 127.0 ? 127.0 : res;
-                res        = res < -128.0 ? -128.0 : res;
+                res        = res > 127.0f ? 127.0f : res;
+                res        = res < -128.0f ? -128.0f : res;
            }

            return res;

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -7,6 +7,7 @@
 #include <migraphx/op/mul.hpp>
 #include <migraphx/op/add.hpp>
 #include <migraphx/op/quant_dot.hpp>
+#include <migraphx/op/capture.hpp>
 #include <migraphx/op/convolution.hpp>
 #include <migraphx/op/quant_convolution.hpp>
 #include <migraphx/op/multibroadcast.hpp>
@@ -124,6 +125,23 @@ void quantize(program& prog) { quantize(prog, {"all"}); }

 static std::vector<std::pair<float, float>> int8_quant_params;

+// function to compute the scale for each convert operator to convert to int8
+void calc_quant_params(std::size_t ins_index, std::vector<migraphx::argument> args)
+{
+    std::pair<float, float> param_pair{1.0f, 0.0f};
+
+    // scale and shift is need for only int8 type, and we do not
+    // consider shift, so set shift to 0
+    std::vector<float> vec_val;
+    args.front().visit([&](auto output) { vec_val.assign(output.begin(), output.end()); });
+    auto max_val     = *std::max_element(vec_val.begin(), vec_val.end());
+    auto min_val     = *std::min_element(vec_val.begin(), vec_val.end());
+    auto max_abs     = std::max(std::fabs(max_val), std::fabs(min_val));
+    param_pair.first = 127.0f / max_abs;
+
+    int8_quant_params[ins_index] = param_pair;
+};
+
 // int8 quantization is different from fp16 since int8 can only handle value
 // -128 ~ 127. To convert the float or double to int8, we need a scale and
 // a shift, then the convert can be done as v_int8 = fp * scale + shift.
@@ -309,9 +327,7 @@ void quantize_int8(program& prog,
                ins,
                op::quant_convolution{padding, stride, dilation, padding_mode, group},
                converted_inputs);
-            auto fp_conv = prog.insert_instruction(
-                ins, op::convert{shape::float_type, adjust_factor, 0.0f}, quant_conv);
-            prog.replace_instruction(ins, op::convert{orig_type, 1.0f, 0.0f}, fp_conv);
+            prog.replace_instruction(ins, op::convert{orig_type, adjust_factor, 0.0f}, quant_conv);
        }
        else
        {
@@ -333,7 +349,66 @@ void quantize_int8(program& prog, const std::vector<std::string>& ins_names)
 void quantize_int8(program& prog)
 {
    std::vector<std::string> ins_names = {"dot", "convolution"};
-    quantize_int8(prog, ins_names);
+    quantize_int8(prog, ins_names, int8_quant_params);
+}
+
+// For the input of each input argument, we need to insert a
+// capture operator to compute the scale and shift
+void capture_arguments(program& prog,
+                       const std::vector<std::string>& ins_names,
+                       std::function<void(std::size_t, std::vector<argument>)> func)
+{
+    size_t num_quant_params = 0;
+    // the int8 quantization only support dot and convolution
+    std::vector<std::string> op_names = {"dot", "convolution", "quant_dot", "quant_convolution"};
+    if(!std::all_of(ins_names.begin(), ins_names.end(), [&](auto name) {
+           return std::find(op_names.begin(), op_names.end(), name) != op_names.end();
+       }))
+    {
+        MIGRAPHX_THROW("CAPTURE_ARGUMENTS: input operator is not supported");
+    }
+
+    std::unordered_map<instruction_ref, instruction_ref> ins_map;
+    for(auto ins : iterator_for(prog))
+    {
+        if(not contains(ins_names, ins->name()))
+        {
+            continue;
+        }
+
+        auto inputs = ins->inputs();
+        std::vector<instruction_ref> new_args;
+        for(auto input : inputs)
+        {
+            instruction_ref new_ins{};
+            if(ins_map.count(input) > 0)
+            {
+                new_ins = ins_map[input];
+            }
+            else
+            {
+                new_ins = prog.insert_instruction(
+                    std::next(input), op::capture{num_quant_params++, func}, input);
+                ins_map[input] = new_ins;
+            }
+            new_args.push_back(new_ins);
+        }
+        instruction::replace(ins, ins->get_operator(), ins->get_shape(), new_args);
+    }
+
+    // set one pair of parameter for each argument
+    int8_quant_params.resize(num_quant_params, std::make_pair(-1.0f, -1.0f));
+}
+
+void capture_arguments(program& prog, const std::vector<std::string>& ins_names)
+{
+    capture_arguments(prog, ins_names, calc_quant_params);
+}
+
+void capture_arguments(program& prog)
+{
+    std::vector<std::string> ins_names = {"dot", "convolution"};
+    capture_arguments(prog, ins_names);
 }

 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/gpu/device/convert.cpp
+++ b/src/targets/gpu/device/convert.cpp
@@ -21,7 +21,7 @@ void convert(hipStream_t stream,
            {
                gs_launch(stream, result.get_shape().elements())([=](auto i) {
                    float res     = input_ptr[i] * scale + shift;
-                    int factor    = (res > 0) ? 1 : -1;
+                    int factor    = (res >= 0.0f) ? 1 : -1;
                    output_ptr[i] = static_cast<int8_t>(
                        std::min<float>(std::max<float>(-128.0f, res + factor * 0.5), 127.0f));
                });

--- a/src/targets/gpu/quant_convolution.cpp
+++ b/src/targets/gpu/quant_convolution.cpp
-#include <migraphx/gpu/quant_convolution.hpp>
-#include <migraphx/gpu/device/convert.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <migraphx/generate.hpp>
+#ifndef MIGRAPHX_GUARD_OPERATORS_CONVERT_HPP
+#define MIGRAPHX_GUARD_OPERATORS_CONVERT_HPP
+
+#include <array>
+#include <migraphx/op/unary.hpp>
+#include <migraphx/operation.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <cmath>
+#include <utility>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
+namespace op {

-shape miopen_quant_convolution::compute_shape(const std::vector<shape>& inputs) const
+struct convert : unary<convert>
 {
-    check_shapes{inputs, *this}.has(5).standard();
-    return op.compute_shape({inputs.at(0), inputs.at(1)});
-}
-argument miopen_quant_convolution::compute(context& ctx,
-                                           const shape& output_shape,
-                                           const std::vector<argument>& args) const
-{
-    auto x_desc      = make_tensor(args[0].get_shape());
-    auto x_desc_vec4 = make_tensor(args[0].get_shape(), true);
-    auto w_desc      = make_tensor(args[1].get_shape());
-    auto w_desc_vec4 = make_tensor(args[1].get_shape(), true);
-    shape tmp_output_shape{shape::float_type, output_shape.lens()};
-    auto y_desc = make_tensor(tmp_output_shape);
-
-    float alpha = 1;
-    float beta  = 0;
+    shape::type_t target_type = shape::half_type;
+    float scale               = 1.0f;
+    float shift               = 0.0f;

-    // pack input to vec4 format
-    auto status = miopenTransformTensor(ctx.get_stream().get_miopen(),
-                                        &alpha,
-                                        x_desc.get(),
-                                        args[0].implicit(),
-                                        &beta,
-                                        x_desc_vec4.get(),
-                                        arg_vec4_x.implicit());
-    if(status != miopenStatusSuccess)
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
    {
-        MIGRAPHX_THROW("QUANT_CONVOLUTION: transform input tensor failed");
+        return pack(
+            f(self.target_type, "target_type"), f(self.scale, "scale"), f(self.shift, "shift"));
    }

-    // pack input to vec4 format
-    status = miopenTransformTensor(ctx.get_stream().get_miopen(),
-                                   &alpha,
-                                   w_desc.get(),
-                                   args[1].implicit(),
-                                   &beta,
-                                   w_desc_vec4.get(),
-                                   arg_vec4_w.implicit());
-    if(status != miopenStatusSuccess)
+    shape compute_shape(std::vector<shape> inputs) const
    {
-        MIGRAPHX_THROW("QUANT_CONVOLUTION: transform weight tensor failed");
+        check_shapes{inputs, *this}.has(1);
+        return {target_type, inputs.at(0).lens(), inputs.at(0).strides()};
    }

-    status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
-                                      &alpha,
-                                      x_desc_vec4.get(),
-                                      arg_vec4_x.implicit(),
-                                      w_desc_vec4.get(),
-                                      arg_vec4_w.implicit(),
-                                      cd.get(),
-                                      algo,
-                                      &beta,
-                                      y_desc.get(),
-                                      args[3].implicit(),
-                                      args[2].implicit(),
-                                      args[2].get_shape().bytes());
-    if(status != miopenStatusSuccess)
+    auto apply() const
    {
-        MIGRAPHX_THROW("QUANT_CONVOLUTION: run convolution forward failed");
+        return [&](auto x) {
+            float res = scale * x + shift;
+            if(target_type == shape::int8_type)
+            {
+                int factor = (res >= 0.0f) ? 1 : -1;
+                res        = res + factor * 0.5f;
+                res        = res > 127.0f ? 127.0f : res;
+                res        = res < -128.0f ? -128.0f : res;
+            }
+
+            return res;
+        };
    }

-    // Add a conversion from float to int32_t
-    device::convert(ctx.get_stream().get(), args[4], args[3], 1.0f, 0.0f, shape::int32_type);
-
-    return args[4];
-}
-
-shape miopen_quant_convolution::compile(context& ctx,
-                                        const shape& output_shape,
-                                        std::vector<shape> inputs)
-{
-    shape workspace_shape{};
-    auto x_desc = make_tensor(inputs[0], true);
-    auto w_desc = make_tensor(inputs[1], true);
-    shape tmp_output_shape{shape::float_type, output_shape.lens()};
-    auto y_desc = make_tensor(tmp_output_shape);
-
-    std::size_t workspace_size = 0;
-    miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
-                                             w_desc.get(),
-                                             x_desc.get(),
-                                             cd.get(),
-                                             y_desc.get(),
-                                             &workspace_size);
-    workspace_shape = shape{shape::int8_type, {workspace_size}};
-
-    arg_vec4_x     = to_gpu(generate_argument(pack_int8_shape(inputs[0])));
-    arg_vec4_w     = to_gpu(generate_argument(pack_int8_shape(inputs[1])));
-    auto y         = allocate_gpu(tmp_output_shape);
-    auto workspace = allocate_gpu(workspace_shape);
-
-    int algo_count = 1;
-    miopenConvAlgoPerf_t perf;
-    auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
-                                                        x_desc.get(),
-                                                        arg_vec4_x.implicit(),
-                                                        w_desc.get(),
-                                                        arg_vec4_w.implicit(),
-                                                        cd.get(),
-                                                        y_desc.get(),
-                                                        y.implicit(),
-                                                        1,
-                                                        &algo_count,
-                                                        &perf,
-                                                        workspace.implicit(),
-                                                        workspace_size,
-                                                        false);
-    if(status != miopenStatusSuccess)
-    {
-        MIGRAPHX_THROW("QUANT_CONVOLUTION: find convolution failed");
-    }
-    handle = ctx.get_stream().get_miopen();
-    algo   = perf.fwd_algo;
-    return shape{shape::int8_type, {perf.memory}};
-}
+    convert(shape::type_t t) : target_type{t} {}
+    convert(shape::type_t t, float sle, float sft) : target_type{t}, scale{sle}, shift{sft} {}
+    convert() {}
+};

-void miopen_quant_convolution::finalize(context& ctx,
-                                        const shape& output_shape,
-                                        std::vector<shape> inputs)
-{
-    if(handle == ctx.get_stream().get_miopen())
-        return;
-    // Check that workspace hasn't changed
-    auto size = inputs.at(2).bytes();
-    auto ws   = compile(ctx, output_shape, std::move(inputs));
-    if(ws.bytes() > size)
-        MIGRAPHX_THROW("Workspace has changed during finalization.");
-}
-
-shape miopen_quant_convolution::pack_int8_shape(shape& s)
-{
-    if(s.type() != shape::int8_type)
-    {
-        MIGRAPHX_THROW("PACK_INT8_SHAPE: only process int8_type");
-    }
-
-    auto lens    = s.lens();
-    auto strides = s.strides();
-    lens[1]      = (lens[1] + 3) / 4 * 4;
-    strides[0]   = strides[1] * lens[1];
-
-    return {s.type(), lens, strides};
-}
-
-} // namespace gpu
+} // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
+
+#endif