merge changes from int8_miopen_call

52ed1fc3 · Shucai Xiao · 749d7a85 · a5d03696 · 52ed1fc3 · 52ed1fc3
Commit 52ed1fc3 authored Jul 12, 2019 by Shucai Xiao
8 changed files
--- a/requirements.txt
+++ b/requirements.txt
 google/protobuf@v3.8.0 -DCMAKE_POSITION_INDEPENDENT_CODE=On -X subdir -Dprotobuf_BUILD_TESTS=Off
 RadeonOpenCompute/rocm-cmake@42f6740 --build
-ROCmSoftwarePlatform/rocBLAS@30a992ae02fda568688bcd190edd5e277d6674d9
+ROCmSoftwarePlatform/rocBLAS@a4c92848ffb9ec934b558c0319a154425fddef7e
-ROCmSoftwarePlatform/MIOpen@1.8.0
+ROCmSoftwarePlatform/MIOpen@2.0.0
 blaze,https://bitbucket.org/blaze-lib/blaze/get/f0755dea0e03.tar.gz -X header -DHEADER_DIR=blaze
 half,https://github.com/pfultz2/half/archive/1.12.0.tar.gz -X header -H sha256:0a08660b68abb176ebc2a0cdf8de46e3182a7f46c66443bb80dbfaaec98cf969
 pybind/pybind11@v2.2.4 -DPYBIND11_TEST=Off --build
--- a/src/include/migraphx/op/quant_convolution.hpp
+++ b/src/include/migraphx/op/quant_convolution.hpp
@@ -52,51 +52,23 @@ struct quant_convolution
        }
        t = shape::int32_type;
-        if(padding_mode == default_)
+        return {t,
-        {
+                {
-            return {t,
+                    input.lens()[0],
-                    {
+                    weights.lens()[0],
-                        input.lens()[0],
+                    std::size_t(std::max<std::ptrdiff_t>(
-                        weights.lens()[0],
+                        1,
-                        std::size_t(std::max<std::ptrdiff_t>(
+                        (input.lens()[2] - (1 + dilation[0] * (weights.lens()[2] - 1)) +
-                            1,
+                         2 * padding[0]) /
-                            (input.lens()[2] - (1 + dilation[0] * (weights.lens()[2] - 1)) +
+                                stride[0] +
-                             2 * padding[0]) /
+                            1)),
-                                    stride[0] +
+                    std::size_t(std::max<std::ptrdiff_t>(
-                                1)),
+                        1,
-                        std::size_t(std::max<std::ptrdiff_t>(
+                        (input.lens()[3] - (1 + dilation[1] * (weights.lens()[3] - 1)) +
-                            1,
+                         2 * padding[1]) /
-                            (input.lens()[3] - (1 + dilation[1] * (weights.lens()[3] - 1)) +
+                                stride[1] +
-                             2 * padding[1]) /
+                            1)),
-                                    stride[1] +
+                }};
-                                1)),
-                    }};
-        }
-        else if(padding_mode == same)
-        {
-            return {t,
-                    {input.lens()[0],
-                     weights.lens()[0],
-                     static_cast<std::size_t>(
-                         std::ceil(static_cast<double>(input.lens()[2]) / stride[0])),
-                     static_cast<std::size_t>(
-                         std::ceil(static_cast<double>(input.lens()[3]) / stride[1]))}};
-        }
-        else if(padding_mode == valid)
-        {
-            return {
-                t,
-                {input.lens()[0],
-                 weights.lens()[0],
-                 static_cast<std::size_t>(std::ceil(
-                     static_cast<double>(input.lens()[2] - weights.lens()[2] + 1) / stride[0])),
-                 static_cast<std::size_t>(std::ceil(
-                     static_cast<double>(input.lens()[3] - weights.lens()[3] + 1) / stride[1]))}};
-        }
-        else
-        {
-            MIGRAPHX_THROW("QUANT_CONVOLUTION: invalid padding mode");
-        }
    }
 };

--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
@@ -49,6 +49,8 @@ inline tensor_descriptor make_tensor(const migraphx::shape& s, bool pack = false
        d = miopenFloat;
    else if(s.type() == shape::half_type)
        d = miopenHalf;
+    else if(s.type() == shape::int32_type)
+        d = miopenInt32;
    else if(s.type() == shape::int8_type)
    {
        if(pack)

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -177,14 +177,10 @@ struct miopen_apply
            auto ws   = conv.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
            auto workspace = insert_allocation(ins, ws, "workspace");
+            auto output    = insert_allocation(ins, ins->get_shape());
-            // add a temp float output to store the miopen convolution output
-            shape tmp_output_shape{shape::float_type, ins->get_shape().lens()};
-            auto tmp_output = insert_allocation(ins, tmp_output_shape, "tmp_out");
-            auto output     = insert_allocation(ins, ins->get_shape());
            return prog->replace_instruction(
-                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, tmp_output, output);
+                ins, conv, ins->inputs().at(0), ins->inputs().at(1), workspace, output);
        });
    }

--- a/src/targets/gpu/quant_convolution.cpp
+++ b/src/targets/gpu/quant_convolution.cpp
@@ -9,7 +9,7 @@ namespace gpu {
 shape miopen_quant_convolution::compute_shape(const std::vector<shape>& inputs) const
 {
-    check_shapes{inputs, *this}.has(5).standard();
+    check_shapes{inputs, *this}.has(4).standard();
    return op.compute_shape({inputs.at(0), inputs.at(1)});
 }
 argument miopen_quant_convolution::compute(context& ctx,
@@ -20,8 +20,7 @@ argument miopen_quant_convolution::compute(context& ctx,
    auto x_desc_vec4 = make_tensor(args[0].get_shape(), true);
    auto w_desc      = make_tensor(args[1].get_shape());
    auto w_desc_vec4 = make_tensor(args[1].get_shape(), true);
-    shape tmp_output_shape{shape::float_type, output_shape.lens()};
+    auto y_desc      = make_tensor(output_shape);
-    auto y_desc = make_tensor(tmp_output_shape);
    float alpha = 1;
    float beta  = 0;
@@ -70,10 +69,7 @@ argument miopen_quant_convolution::compute(context& ctx,
        MIGRAPHX_THROW("QUANT_CONVOLUTION: run convolution forward failed");
    }
-    // Add a conversion from float to int32_t
+    return args[3];
-    device::convert(ctx.get_stream().get(), args[4], args[3], 1.0f, 0.0f, shape::int32_type);
-    return args[4];
 }
 shape miopen_quant_convolution::compile(context& ctx,
@@ -83,8 +79,7 @@ shape miopen_quant_convolution::compile(context& ctx,
    shape workspace_shape{};
    auto x_desc = make_tensor(inputs[0], true);
    auto w_desc = make_tensor(inputs[1], true);
-    shape tmp_output_shape{shape::float_type, output_shape.lens()};
+    auto y_desc = make_tensor(output_shape);
-    auto y_desc = make_tensor(tmp_output_shape);
    std::size_t workspace_size = 0;
    miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
@@ -97,7 +92,7 @@ shape miopen_quant_convolution::compile(context& ctx,
    arg_vec4_x     = to_gpu(generate_argument(pack_int8_shape(inputs[0])));
    arg_vec4_w     = to_gpu(generate_argument(pack_int8_shape(inputs[1])));
-    auto y         = allocate_gpu(tmp_output_shape);
+    auto y         = allocate_gpu(output_shape);
    auto workspace = allocate_gpu(workspace_shape);
    int algo_count = 1;

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -1477,76 +1477,6 @@ TEST_CASE(quant_conv2d_test)
    EXPECT(migraphx::verify_range(results_vector, s));
 }
-TEST_CASE(quant_conv2d_test_default_mode)
-{
-    migraphx::program p;
-    migraphx::shape a_shape{migraphx::shape::int8_type, {2, 3, 4, 4}};
-    std::vector<int8_t> a(2 * 3 * 4 * 4);
-    std::iota(a.begin(), a.end(), 0);
-    auto al = p.add_literal(migraphx::literal{a_shape, a});
-    migraphx::shape c_shape{migraphx::shape::int8_type, {2, 3, 3, 3}};
-    std::vector<int8_t> c(2 * 3 * 3 * 3);
-    std::iota(c.begin(), c.end(), 0);
-    auto cl = p.add_literal(migraphx::literal{c_shape, c});
-    p.add_instruction(
-        migraphx::op::quant_convolution{{{0, 0}}, {{1, 1}}, {{1, 1}}, migraphx::op::same}, al, cl);
-    p.compile(migraphx::cpu::target{});
-    auto result = p.eval({});
-    std::vector<int32_t> s = {
-        10197, 10548, 6939,  3420,  11601, 11952, 7839,  3852,  7383,  7590,  4953,  2421,  3480,
-        3570,  2316,  1125,  25506, 26586, 17874, 9009,  29826, 30906, 20718, 10413, 20505, 21198,
-        14187, 7119,  10527, 10860, 7257,  3636,  27045, 27396, 17739, 8604,  28449, 28800, 18639,
-        9036,  17319, 17526, 11289, 5445,  7800,  7890,  5052,  2421,  77346, 78426, 52002, 25857,
-        81666, 82746, 54846, 27261, 53769, 54462, 36075, 17919, 26511, 26844, 17769, 8820};
-    std::vector<int32_t> results_vector;
-    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    EXPECT(migraphx::verify_range(results_vector, s));
-}
-TEST_CASE(quant_conv2d_test_valid_mode)
-{
-    migraphx::program p;
-    migraphx::shape a_shape{migraphx::shape::int8_type, {2, 3, 4, 4}};
-    std::vector<int8_t> a(2 * 3 * 4 * 4);
-    std::iota(a.begin(), a.end(), 0);
-    auto al = p.add_literal(migraphx::literal{a_shape, a});
-    migraphx::shape c_shape{migraphx::shape::int8_type, {2, 3, 3, 3}};
-    std::vector<int8_t> c(2 * 3 * 3 * 3);
-    std::iota(c.begin(), c.end(), 0);
-    auto cl = p.add_literal(migraphx::literal{c_shape, c});
-    p.add_instruction(
-        migraphx::op::quant_convolution{{{0, 0}}, {{1, 1}}, {{1, 1}}, migraphx::op::valid}, al, cl);
-    p.compile(migraphx::cpu::target{});
-    auto result = p.eval({});
-    std::vector<int32_t> s = {10197,
-                              10548,
-                              11601,
-                              11952,
-                              25506,
-                              26586,
-                              29826,
-                              30906,
-                              27045,
-                              27396,
-                              28449,
-                              28800,
-                              77346,
-                              78426,
-                              81666,
-                              82746};
-    std::vector<int32_t> results_vector;
-    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    EXPECT(migraphx::verify_range(results_vector, s));
-}
 TEST_CASE(quant_conv2d_padding_test)
 {
    migraphx::program p;

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -243,18 +243,6 @@ struct test_exp : verify_program<test_exp>
    }
 };
-struct test_erf : verify_program<test_erf>
-{
-    migraphx::program create_program() const
-    {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 6}};
-        auto param = p.add_parameter("x", s);
-        p.add_instruction(migraphx::op::erf{}, param);
-        return p;
-    }
-};
 struct test_log : verify_program<test_log>
 {
    migraphx::program create_program() const
@@ -604,13 +592,13 @@ struct test_softmax2 : verify_program<test_softmax2>
    }
 };
-template <int Axis, migraphx::shape::type_t T>
+template <int Axis>
-struct test_softmax : verify_program<test_softmax<Axis, T>>
+struct test_softmax : verify_program<test_softmax<Axis>>
 {
    migraphx::program create_program() const
    {
        migraphx::program p;
-        migraphx::shape s{T, {512, 4, 1067, 6}};
+        migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}};
        auto param = p.add_parameter("0", s);
        p.add_instruction(migraphx::op::softmax{Axis}, param);
@@ -618,38 +606,10 @@ struct test_softmax : verify_program<test_softmax<Axis, T>>
    }
 };
-template struct test_softmax<0, migraphx::shape::float_type>;
+template struct test_softmax<0>;
-template struct test_softmax<2, migraphx::shape::float_type>;
+template struct test_softmax<1>;
-template struct test_softmax<1, migraphx::shape::double_type>;
+template struct test_softmax<2>;
-template struct test_softmax<3, migraphx::shape::double_type>;
+template struct test_softmax<3>;
-template struct test_softmax<0, migraphx::shape::half_type>;
-template struct test_softmax<1, migraphx::shape::half_type>;
-template struct test_softmax<2, migraphx::shape::half_type>;
-template struct test_softmax<3, migraphx::shape::half_type>;
-template <class T, int Axis>
-struct test_arg_ops : verify_program<test_arg_ops<T, Axis>>
-{
-    migraphx::program create_program() const
-    {
-        migraphx::program p;
-        migraphx::shape s{migraphx::shape::float_type, {2, 3, 4, 1025}};
-        auto param = p.add_parameter("data", s);
-        p.add_instruction(T{Axis}, param);
-        return p;
-    }
-};
-template struct test_arg_ops<migraphx::op::argmax, 0>;
-template struct test_arg_ops<migraphx::op::argmax, 1>;
-template struct test_arg_ops<migraphx::op::argmax, 2>;
-template struct test_arg_ops<migraphx::op::argmax, 3>;
-template struct test_arg_ops<migraphx::op::argmin, 0>;
-template struct test_arg_ops<migraphx::op::argmin, 1>;
-template struct test_arg_ops<migraphx::op::argmin, 2>;
-template struct test_arg_ops<migraphx::op::argmin, 3>;
 struct test_conv : verify_program<test_conv>
 {
@@ -3570,13 +3530,32 @@ struct test_lstm_bidirct_default_actv2 : verify_program<test_lstm_bidirct_defaul
    }
 };
-template <int Axis, migraphx::shape::type_t T>
+template <int Axis>
-struct test_logsoftmax : verify_program<test_logsoftmax<Axis, T>>
+struct test_logsoftmax : verify_program<test_logsoftmax<Axis>>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}};
+        auto param = p.add_parameter("0", s);
+        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);
+        return p;
+    }
+};
+template struct test_logsoftmax<0>;
+template struct test_logsoftmax<1>;
+template struct test_logsoftmax<2>;
+template struct test_logsoftmax<3>;
+template <int Axis>
+struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
 {
    migraphx::program create_program() const
    {
        migraphx::program p;
-        migraphx::shape s{T, {10, 4, 2080, 6}};
+        migraphx::shape s{migraphx::shape::float_type, {3}};
        auto param = p.add_parameter("0", s);
        p.add_instruction(migraphx::op::logsoftmax{Axis}, param);
@@ -3584,16 +3563,7 @@ struct test_logsoftmax : verify_program<test_logsoftmax<Axis, T>>
    }
 };
-template struct test_logsoftmax<0, migraphx::shape::float_type>;
+template struct test_logsoftmax_1<0>;
-template struct test_logsoftmax<1, migraphx::shape::float_type>;
-template struct test_logsoftmax<2, migraphx::shape::float_type>;
-template struct test_logsoftmax<3, migraphx::shape::float_type>;
-template struct test_logsoftmax<1, migraphx::shape::double_type>;
-template struct test_logsoftmax<3, migraphx::shape::double_type>;
-template struct test_logsoftmax<1, migraphx::shape::half_type>;
-template struct test_logsoftmax<0, migraphx::shape::half_type>;
-template struct test_logsoftmax<2, migraphx::shape::half_type>;
-template struct test_logsoftmax<3, migraphx::shape::half_type>;
 struct test_fp32_fp16_lall : verify_program<test_fp32_fp16_lall>
 {

--- a/test/op_shape_test.cpp
+++ b/test/op_shape_test.cpp
@@ -94,16 +94,6 @@ TEST_CASE(quant_convolution_shape)
    throws_shape(migraphx::op::quant_convolution{}, input3, weights);
    throws_shape(migraphx::op::quant_convolution{}, input, weight3);
    throws_shape(migraphx::op::quant_convolution{}, input3, weight3);
-    migraphx::shape output_same_mode{migraphx::shape::int32_type, {4, 4, 3, 3}};
-    expect_shape(output_same_mode,
-                 migraphx::op::quant_convolution{{{0, 0}}, {{1, 1}}, {{1, 1}}, migraphx::op::same},
-                 input,
-                 weights);
-    expect_shape(output,
-                 migraphx::op::quant_convolution{{{0, 0}}, {{1, 1}}, {{1, 1}}, migraphx::op::valid},
-                 input,
-                 weights);
 }
 TEST_CASE(transpose_shape)