manual merge

5cf8eb23 · Khalique · 54579e09 · abf1b8e4 · 5cf8eb23 · 5cf8eb23
Commit 5cf8eb23 authored Jul 25, 2019 by Khalique
8 changed files
--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -66,8 +66,8 @@ struct onnx_parser
        add_variadic_op("Max", op::max{});
        add_variadic_op("Min", op::min{});
-        add_mem_op("ArgMax", &onnx_parser::parse_argmax);
+        add_mem_op("ArgMax", &onnx_parser::parse_arg_op<op::argmax>);
-        add_mem_op("ArgMin", &onnx_parser::parse_argmin);
+        add_mem_op("ArgMin", &onnx_parser::parse_arg_op<op::argmin>);
        add_mem_op("Cast", &onnx_parser::parse_cast);
        add_mem_op("Clip", &onnx_parser::parse_clip);
        add_mem_op("LRN", &onnx_parser::parse_lrn);
@@ -86,8 +86,8 @@ struct onnx_parser
        add_mem_op("Gemm", &onnx_parser::parse_gemm);
        add_mem_op("MatMul", &onnx_parser::parse_matmul);
        add_mem_op("BatchNormalization", &onnx_parser::parse_batchnorm);
-        add_mem_op("Softmax", &onnx_parser::parse_softmax);
+        add_mem_op("Softmax", &onnx_parser::parse_softmax<op::softmax>);
-        add_mem_op("LogSoftmax", &onnx_parser::parse_logsoftmax);
+        add_mem_op("LogSoftmax", &onnx_parser::parse_softmax<op::logsoftmax>);
        add_mem_op("Squeeze", &onnx_parser::parse_squeeze);
        add_mem_op("Unsqueeze", &onnx_parser::parse_unsqueeze);
        add_mem_op("Slice", &onnx_parser::parse_slice);
@@ -261,17 +261,8 @@ struct onnx_parser
        return prog.add_instruction(op, std::move(args));
    }
-    instruction_ref
+    template <class Op>
-    parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    instruction_ref parse_softmax(const std::string&,
-    {
-        auto dims = args.front()->get_shape().lens();
-        auto r =
-            prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1]), 1, 1}}, args.front());
-        auto s = prog.add_instruction(op::softmax{}, r);
-        return prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1])}}, s);
-    }
-    instruction_ref parse_logsoftmax(const std::string&,
                                  const attribute_map& attributes,
                                  std::vector<instruction_ref> args)
    {
@@ -281,10 +272,11 @@ struct onnx_parser
            axis = parse_value(attributes.at("axis")).at<int>();
        }
-        return prog.add_instruction(op::logsoftmax{axis}, std::move(args));
+        return prog.add_instruction(Op{axis}, std::move(args));
    }
-    instruction_ref parse_argmax(const std::string&,
+    template <class Op>
+    instruction_ref parse_arg_op(const std::string&,
                                 const attribute_map& attributes,
                                 std::vector<instruction_ref> args)
    {
@@ -302,39 +294,12 @@ struct onnx_parser
        if(keep_dims == 0)
        {
-            auto ins = prog.add_instruction(op::argmax{axis}, std::move(args));
+            auto ins = prog.add_instruction(Op{axis}, std::move(args));
            return prog.add_instruction(op::squeeze{{axis}}, ins);
        }
        else
        {
-            return prog.add_instruction(op::argmax{axis}, std::move(args));
+            return prog.add_instruction(Op{axis}, std::move(args));
-        }
-    }
-    instruction_ref parse_argmin(const std::string&,
-                                 const attribute_map& attributes,
-                                 std::vector<instruction_ref> args)
-    {
-        int64_t axis = 0;
-        if(contains(attributes, "axis"))
-        {
-            axis = static_cast<int64_t>(parse_value(attributes.at("axis")).at<int>());
-        }
-        int keep_dims = 1;
-        if(contains(attributes, "keepdims"))
-        {
-            keep_dims = parse_value(attributes.at("keepdims")).at<int>();
-        }
-        if(keep_dims == 0)
-        {
-            auto ins = prog.add_instruction(op::argmin{axis}, std::move(args));
-            return prog.add_instruction(op::squeeze{{axis}}, ins);
-        }
-        else
-        {
-            return prog.add_instruction(op::argmin{axis}, std::move(args));
        }
    }
@@ -470,6 +435,12 @@ struct onnx_parser
            check_arg_empty(s, "Reshape: dynamic shape is not supported");
            s.visit([&](auto v) { copy(v, std::back_inserter(op.dims)); });
        }
+        if(!args[0]->get_shape().standard())
+        {
+            args[0] = prog.add_instruction(op::contiguous{}, args[0]);
+        }
        return prog.add_instruction(op, args[0]);
    }
@@ -849,7 +820,7 @@ struct onnx_parser
        {
            dtype = parse_value(attributes.at("dtype")).at<int>();
        }
-        migraphx::shape::type_t type = get_type(dtype);
+        shape::type_t type = get_type(dtype);
        if(contains(attributes, "input_as_shape"))
        {
@@ -972,7 +943,6 @@ struct onnx_parser
        std::vector<std::size_t> dims;
        arg_s.visit([&](auto input) { dims.assign(input.begin(), input.end()); });
        auto out_lens = compute_broadcasted_lens(in_lens, dims);
        return prog.add_instruction(op::multibroadcast{out_lens}, args[0]);
    }

--- a/src/targets/gpu/device/gather.cpp
+++ b/src/targets/gpu/device/gather.cpp
@@ -25,7 +25,7 @@ argument gather(hipStream_t stream, argument result, argument arg1, argument arg
            arg2.visit([&](auto indices) {
                const auto* indices_ptr = device_cast(indices.data());
                auto* output_ptr        = device_cast(output.data());
-                gs_launch(stream, nelements)([=](auto i) {
+                gs_launch(stream, nelements, 256)([=](auto i) {
                    auto idx        = out_comp.multi(i);
                    idx[axis_index] = indices_ptr[idx[axis_index]];
                    output_ptr[i]   = input[idx];

--- a/src/targets/gpu/gemm.cpp
+++ b/src/targets/gpu/gemm.cpp
@@ -167,10 +167,28 @@ rb_type<T>* to_rocblas_type(T* x)
 rocblas_half to_rocblas_type(half x) { return reinterpret_cast<const rocblas_half&>(x); }
+void miopen_gemm::batch_not_transposed(const std::vector<std::size_t>& strides) const
+{
+    if(strides.size() <= 2)
+        return;
+    auto dim_0       = strides.size() - 2;
+    auto matrix_size = std::max(strides[dim_0], strides[dim_0 + 1]);
+    std::vector<std::size_t> batch(strides.begin(), strides.begin() + dim_0);
+    if(std::adjacent_find(batch.begin(), batch.end(), [&](auto i, auto j) {
+           return (i < j or i < matrix_size or j < matrix_size);
+       }) != batch.end())
+    {
+        MIGRAPHX_THROW("DOT: batch size {" + to_string_range(strides) + "} is transposed!");
+    }
+}
 shape miopen_gemm::compute_shape(const std::vector<shape>& inputs) const
 {
    std::vector<shape> input_shapes(inputs.begin(), inputs.begin() + inputs.size() - 1);
    check_shapes{input_shapes}.not_broadcasted();
+    batch_not_transposed(inputs[0].strides());
+    batch_not_transposed(inputs[1].strides());
    return op.compute_shape(input_shapes);
 }

--- a/src/targets/gpu/include/migraphx/gpu/gemm.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/gemm.hpp
@@ -24,6 +24,7 @@ struct miopen_gemm
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    void batch_not_transposed(const std::vector<std::size_t>& strides) const;
    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {
        return shapes.size() - 1;

--- a/src/tf/tf.cpp
+++ b/src/tf/tf.cpp
@@ -182,7 +182,7 @@ struct tf_parser
        add_mem_op("Pad", &tf_parser::parse_pad);
        add_mem_op("Reshape", &tf_parser::parse_reshape, false);
        add_mem_op("Slice", &tf_parser::parse_slice, false);
-        add_mem_op("Softmax", &tf_parser::parse_softmax);
+        add_mem_op("Softmax", &tf_parser::parse_softmax<op::softmax>);
        add_mem_op("Squeeze", &tf_parser::parse_squeeze, false);
        add_mem_op("StridedSlice", &tf_parser::parse_stridedslice);
        add_mem_op("Transpose", &tf_parser::parse_transpose, false);
@@ -735,13 +735,46 @@ struct tf_parser
    }
    instruction_ref
-    parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    parse_slice(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    {
+        op::slice op;
+        auto starts     = args[1]->eval().get<int32_t>().to_vector();
+        auto size       = args[2]->eval().get<int32_t>().to_vector();
+        auto axes       = args[0]->get_shape().lens();
+        size_t num_axes = axes.size();
+        op.starts = std::vector<int64_t>(starts.begin(), starts.end());
+        op.ends   = std::vector<int64_t>(num_axes);
+        op.axes   = std::vector<int64_t>(num_axes);
+        std::iota(op.axes.begin(), op.axes.end(), 0);
+        for(size_t i = 0; i < num_axes; i++)
        {
-        auto dims = args.front()->get_shape().lens();
+            if(size[i] == -1)
-        auto r =
+                op.ends[i] = axes[i];
-            prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1]), 1, 1}}, args.front());
+            else
-        auto s = prog.add_instruction(op::softmax{}, r);
+                op.ends[i] = starts[i] + size[i];
-        return prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1])}}, s);
+        }
+        return prog.add_instruction(op, make_contiguous(args[0]));
+    }
+    // template to facilitate the logsoftmax later
+    template <class Op>
+    instruction_ref parse_softmax(const std::string&,
+                                  const attribute_map& attributes,
+                                  std::vector<instruction_ref> args)
+    {
+        int axis      = -1;
+        auto num_dims = args[0]->get_shape().lens().size();
+        if(contains(attributes, "axis"))
+        {
+            axis = static_cast<int>(attributes.at("axis").i());
+        }
+        if(axis < 0)
+        {
+            axis += num_dims;
+        }
+        return prog.add_instruction(Op{axis}, make_contiguous(args[0]));
    }
    instruction_ref parse_squeeze(const std::string&,
@@ -766,29 +799,6 @@ struct tf_parser
        return prog.add_instruction(op, make_contiguous(args[0]));
    }
-    instruction_ref
-    parse_slice(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
-    {
-        op::slice op;
-        auto starts     = args[1]->eval().get<int32_t>().to_vector();
-        auto size       = args[2]->eval().get<int32_t>().to_vector();
-        auto axes       = args[0]->get_shape().lens();
-        size_t num_axes = axes.size();
-        op.starts = std::vector<int64_t>(starts.begin(), starts.end());
-        op.ends   = std::vector<int64_t>(num_axes);
-        op.axes   = std::vector<int64_t>(num_axes);
-        std::iota(op.axes.begin(), op.axes.end(), 0);
-        for(size_t i = 0; i < num_axes; i++)
-        {
-            if(size[i] == -1)
-                op.ends[i] = axes[i];
-            else
-                op.ends[i] = starts[i] + size[i];
-        }
-        return prog.add_instruction(op, make_contiguous(args[0]));
-    }
    instruction_ref parse_stridedslice(const std::string&,
                                       const attribute_map& attributes,
                                       std::vector<instruction_ref> args)

--- a/test/onnx/onnx_test.cpp
+++ b/test/onnx/onnx_test.cpp
@@ -423,9 +423,7 @@ TEST_CASE(softmax_test)
 {
    migraphx::program p;
    auto l0 = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {1, 3}});
-    auto r  = p.add_instruction(migraphx::op::reshape{{1, 3, 1, 1}}, l0);
+    p.add_instruction(migraphx::op::softmax{1}, l0);
-    auto s  = p.add_instruction(migraphx::op::softmax{}, r);
-    p.add_instruction(migraphx::op::reshape{{1, 3}}, s);
    auto prog = migraphx::parse_onnx("softmax_test.onnx");
    EXPECT(p == prog);
@@ -447,6 +445,21 @@ TEST_CASE(reshape_test)
    EXPECT(p == prog);
 }
+TEST_CASE(reshape_non_standard)
+{
+    migraphx::program p;
+    migraphx::op::reshape op;
+    std::vector<int64_t> reshape_dims{4, 3, 2};
+    migraphx::shape s{migraphx::shape::float_type, {2, 3, 4}};
+    auto x      = p.add_parameter("x", s);
+    auto tran_x = p.add_instruction(migraphx::op::transpose{{0, 2, 1}}, x);
+    auto cont_x = p.add_instruction(migraphx::op::contiguous{}, tran_x);
+    p.add_instruction(migraphx::op::reshape{{4, 3, 2}}, cont_x);
+    auto prog = migraphx::parse_onnx("reshape_non_standard.onnx");
+    EXPECT(p == prog);
+}
 TEST_CASE(shape_test)
 {
    migraphx::program p;

--- a/test/onnx/reshape_non_standard.onnx
+++ b/test/onnx/reshape_non_standard.onnx
--- a/test/tf/tf_test.cpp
+++ b/test/tf/tf_test.cpp
@@ -436,10 +436,7 @@ TEST_CASE(softmax_test)
 {
    migraphx::program p;
    auto l0 = p.add_parameter("0", migraphx::shape{migraphx::shape::float_type, {1, 3}});
-    auto dims = l0->get_shape().lens();
+    p.add_instruction(migraphx::op::softmax{1}, l0);
-    auto r    = p.add_instruction(migraphx::op::reshape{{long(dims[0]), long(dims[1]), 1, 1}}, l0);
-    auto s    = p.add_instruction(migraphx::op::softmax{}, r);
-    p.add_instruction(migraphx::op::reshape{{long(dims[0]), long(dims[1])}}, s);
    auto prog = optimize_tf("softmax_test.pb", false);
    EXPECT(p == prog);