add int8_convolution implementation

1222d174 · Shucai Xiao · 464f950b · 1222d174 · 1222d174 · 1222d174
Commit 1222d174 authored May 01, 2019 by Shucai Xiao
8 changed files
--- a/src/include/migraphx/op/quant_convolution.hpp
+++ b/src/include/migraphx/op/quant_convolution.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
+#define MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
+#include <array>
+#include <migraphx/op/common.hpp>
+#include <migraphx/operation.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <cmath>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct quant_convolution
+{
+    std::array<std::size_t, 2> padding  = {{0, 0}};
+    std::array<std::size_t, 2> stride   = {{1, 1}};
+    std::array<std::size_t, 2> dilation = {{1, 1}};
+    padding_mode_t padding_mode = default_;
+    int group                   = 1;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.padding, "padding"),
+                    f(self.stride, "stride"),
+                    f(self.dilation, "dilation"),
+                    f(self.padding_mode, "padding_mode"),
+                    f(self.group, "group"));
+    }
+    std::string name() const { return "quant_convolution"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(2).same_type().same_ndims().only_dims(4);
+        const shape& input   = inputs.at(0);
+        const shape& weights = inputs.at(1);
+        auto t               = input.type();
+        if (t != shape::int8_type)
+        {
+            MIGRAPHX_THROW("QUANT_THROW: only accept input of type int8_t");
+        }
+        t = shape::float_type;
+        if(padding_mode == default_)
+        {
+            return {t,
+                    {
+                        input.lens()[0],
+                        weights.lens()[0],
+                        std::size_t(std::max<std::ptrdiff_t>(
+                            1,
+                            (input.lens()[2] - (1 + dilation[0] * (weights.lens()[2] - 1)) +
+                             2 * padding[0]) /
+                                    stride[0] +
+                                1)),
+                        std::size_t(std::max<std::ptrdiff_t>(
+                            1,
+                            (input.lens()[3] - (1 + dilation[1] * (weights.lens()[3] - 1)) +
+                             2 * padding[1]) /
+                                    stride[1] +
+                                1)),
+                    }};
+        }
+        else if(padding_mode == same)
+        {
+            return {t,
+                    {input.lens()[0],
+                     weights.lens()[0],
+                     static_cast<std::size_t>(
+                         std::ceil(static_cast<double>(input.lens()[2]) / stride[0])),
+                     static_cast<std::size_t>(
+                         std::ceil(static_cast<double>(input.lens()[3]) / stride[1]))}};
+        }
+        else if(padding_mode == valid)
+        {
+            return {
+                t,
+                {input.lens()[0],
+                 weights.lens()[0],
+                 static_cast<std::size_t>(std::ceil(
+                     static_cast<double>(input.lens()[2] - weights.lens()[2] + 1) / stride[0])),
+                 static_cast<std::size_t>(std::ceil(
+                     static_cast<double>(input.lens()[3] - weights.lens()[3] + 1) / stride[1]))}};
+        }
+        else
+        {
+            MIGRAPHX_THROW("Invalid padding mode");
+        }
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -40,6 +40,7 @@
 #include <migraphx/op/outline.hpp>
 #include <migraphx/op/pad.hpp>
 #include <migraphx/op/pooling.hpp>
+#include <migraphx/op/quant_convolution.hpp>
 #include <migraphx/op/quant_dot.hpp>
 #include <migraphx/op/relu.hpp>
 #include <migraphx/op/reshape.hpp>

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -186,6 +186,52 @@ struct cpu_convolution
    }
 };
+struct cpu_quant_convolution
+{
+    op::quant_convolution op;
+    std::string name() const { return "cpu::quant_convolution"; }
+    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
+    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
+            auto in   = input.get_shape().lens();
+            auto in_h = in[2];
+            auto in_w = in[3];
+            auto wei   = weights.get_shape().lens();
+            auto wei_n = wei[0];
+            auto wei_c = wei[1];
+            auto wei_h = wei[2];
+            auto wei_w = wei[3];
+            par_dfor(output_shape.lens()[0],
+                     output_shape.lens()[1],
+                     output_shape.lens()[2],
+                     output_shape.lens()[3])(
+                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
+                    const int start_x  = i * op.stride[0] - op.padding[0];
+                    const int start_y  = j * op.stride[1] - op.padding[1];
+                    const int group_id = w / (wei_n / op.group);
+                    double acc = 0;
+                    dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
+                        const int in_x  = start_x + x;
+                        const int in_y  = start_y + y;
+                        const int in_ch = group_id * wei_c + k;
+                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
+                        {
+                            acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
+                        }
+                    });
+                    output(o, w, i, j) = acc;
+                });
+        });
+        return result;
+    }
+};
 struct cpu_im2col
 {
    op::im2col op;
@@ -876,6 +922,7 @@ struct cpu_apply
    {
        apply_map["im2col"]      = extend_op<cpu_im2col, op::im2col>();
        apply_map["convolution"] = extend_op<cpu_convolution, op::convolution>();
+        apply_map["quant_convolution"] = extend_op<cpu_quant_convolution, op::quant_convolution>();
        apply_map["dot"]         = extend_op<cpu_gemm, op::dot>();
        apply_map["quant_dot"]   = extend_op<cpu_quant_gemm, op::quant_dot>();
        apply_map["batch_norm_inference"] =

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -49,6 +49,7 @@ add_library(migraphx_gpu
    quant_gemm.cpp
    pooling.cpp
    convolution.cpp
+    quant_convolution.cpp
    softmax.cpp
    logsoftmax.cpp
    contiguous.cpp

--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
@@ -57,7 +57,8 @@ inline tensor_descriptor make_tensor(const migraphx::shape& s)
    return t;
 }
-inline convolution_descriptor make_conv(const migraphx::op::convolution& op)
+template<class T>
+inline convolution_descriptor make_conv(const T& op)
 {
    auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
    miopenConvolutionMode_t c_mode = miopenConvolution;

--- a/src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
+#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
+#include <migraphx/shape.hpp>
+#include <migraphx/op/quant_convolution.hpp>
+#include <migraphx/gpu/miopen.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct context;
+struct miopen_quant_convolution
+{
+    op::quant_convolution op;
+    shared<convolution_descriptor> cd;
+    miopenConvFwdAlgorithm_t algo{};
+    miopenHandle_t handle = nullptr;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        // TODO: Add algo
+        return op::convolution::reflect(self.op, f);
+    }
+    std::string name() const { return "gpu::quant_convolution"; }
+    shape compute_shape(const std::vector<shape>& inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -14,6 +14,7 @@
 #include <migraphx/gpu/rocblas.hpp>
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/gpu/convolution.hpp>
+#include <migraphx/gpu/quant_convolution.hpp>
 #include <migraphx/gpu/contiguous.hpp>
 #include <migraphx/gpu/relu.hpp>
 #include <migraphx/gpu/sigmoid.hpp>
@@ -106,6 +107,7 @@ struct miopen_apply
        add_lrn_op();
        add_convolution_op();
+        add_quant_convolution_op();
        add_pooling_op();
        add_batch_norm_inference_op();
    }
@@ -152,6 +154,22 @@ struct miopen_apply
        });
    }
+    void add_quant_convolution_op()
+    {
+        apply_map.emplace("quant_convolution", [=](instruction_ref ins) {
+            auto&& op = any_cast<op::quant_convolution>(ins->get_operator());
+            auto conv = miopen_quant_convolution{op, make_conv(op)};
+            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
+            auto workspace = insert_allocation(ins, ws, "workspace");
+            auto output    = insert_allocation(ins, ins->get_shape());
+            return prog->replace_instruction(
+                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
+        });
+    }
    void add_pooling_op()
    {
        apply_map.emplace("pooling", [=](instruction_ref ins) {

--- a/src/targets/gpu/quant_convolution.cpp
+++ b/src/targets/gpu/quant_convolution.cpp
+#include <migraphx/gpu/quant_convolution.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/generate.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+shape miopen_quant_convolution::compute_shape(const std::vector<shape>& inputs) const
+{
+    check_shapes{inputs, *this}.has(4).standard();
+    return op.compute_shape({inputs.at(0), inputs.at(1)});
+}
+argument miopen_quant_convolution::compute(context& ctx,
+                                     const shape& output_shape,
+                                     const std::vector<argument>& args) const
+{
+    auto x_desc = make_tensor(args[0].get_shape());
+    auto w_desc = make_tensor(args[1].get_shape());
+    auto y_desc = make_tensor(output_shape);
+    int8_t alpha = 1;
+    int8_t beta  = 0;
+    auto status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
+                                           &alpha,
+                                           x_desc.get(),
+                                           args[0].implicit(),
+                                           w_desc.get(),
+                                           args[1].implicit(),
+                                           cd.get(),
+                                           algo,
+                                           &beta,
+                                           y_desc.get(),
+                                           args[3].implicit(),
+                                           args[2].implicit(),
+                                           args[2].get_shape().bytes());
+    assert(status == miopenStatusSuccess);
+    return args[3];
+}
+shape miopen_quant_convolution::compile(context& ctx,
+                                  const shape& output_shape,
+                                  std::vector<shape> inputs)
+{
+    shape workspace_shape{};
+    auto x_desc = make_tensor(inputs[0]);
+    auto w_desc = make_tensor(inputs[1]);
+    auto y_desc = make_tensor(output_shape);
+    std::size_t workspace_size = 0;
+    miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
+                                             w_desc.get(),
+                                             x_desc.get(),
+                                             cd.get(),
+                                             y_desc.get(),
+                                             &workspace_size);
+    workspace_shape = shape{shape::int8_type, {workspace_size}};
+    auto x         = to_gpu(generate_argument(inputs[0]));
+    auto w         = to_gpu(generate_argument(inputs[1]));
+    auto y         = allocate_gpu(output_shape);
+    auto workspace = allocate_gpu(workspace_shape);
+    int algo_count = 1;
+    miopenConvAlgoPerf_t perf;
+    auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
+                                                        x_desc.get(),
+                                                        x.implicit(),
+                                                        w_desc.get(),
+                                                        w.implicit(),
+                                                        cd.get(),
+                                                        y_desc.get(),
+                                                        y.implicit(),
+                                                        1,
+                                                        &algo_count,
+                                                        &perf,
+                                                        workspace.implicit(),
+                                                        workspace_size,
+                                                        false);
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("Find convolution failed");
+    handle = ctx.get_stream().get_miopen();
+    algo   = perf.fwd_algo;
+    return shape{shape::int8_type, {perf.memory}};
+}
+void miopen_quant_convolution::finalize(context& ctx,
+                                  const shape& output_shape,
+                                  std::vector<shape> inputs)
+{
+    if(handle == ctx.get_stream().get_miopen())
+        return;
+    // Check that workspace hasn't changed
+    auto size = inputs.at(2).bytes();
+    auto ws   = compile(ctx, output_shape, std::move(inputs));
+    if(ws.bytes() > size)
+        MIGRAPHX_THROW("Workspace has changed during finalization.");
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx