Merge branch 'extend_gemm_op' into seq2seq_example

9b53cf55 · Shucai Xiao · 0db15370 · ad8f88f5 · 9b53cf55 · 9b53cf55
Commit 9b53cf55 authored Feb 28, 2019 by Shucai Xiao
20 changed files
--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -768,7 +768,7 @@ struct gather
        // for scalar output
        if(lens.empty())
        {
-            return {type, {1}, {0}};
+            return {type};
        }

        return {type, lens};
@@ -826,26 +826,21 @@ struct dot
        const shape& b = inputs.at(1);
        auto t         = a.type();

-        // change to support cases like {1, 1, 3, 5} X {1, 1, 5, 6},
-        // which can be handled by numpy. as long as all previous
-        // dims are 1 except the last two dims, the two matrices
-        // are multipliable
-        if(std::any_of(a.lens().rbegin() + 2, a.lens().rend(), [](auto i) { return (i != 1); }))
+        // according to the specification of the numpy.matmul()
+        // inputs with the shape dims more than 2 are acceptable
+        // as long as dim values are the same in the two inputs
+        if(!std::equal(a.lens().rbegin() + 2, a.lens().rend(), b.lens().rbegin() + 2))
        {
-            MIGRAPHX_THROW("DOT: first matrix, dimensions before matrix dims must be 1");
+            MIGRAPHX_THROW("DOT: dim values mismatch");
        }

-        if(std::any_of(b.lens().rbegin() + 2, b.lens().rend(), [](auto i) { return (i != 1); }))
-        {
-            MIGRAPHX_THROW("DOT: second matrix, dimensions before matrix dims must be 1");
-        }
-
-        std::size_t n_dims = a.lens().size();
-        if(a.lens()[n_dims - 1] != b.lens()[n_dims - 2])
+        std::size_t dim_0 = a.lens().size() - 2;
+        std::size_t dim_1 = a.lens().size() - 1;
+        if(a.lens()[dim_1] != b.lens()[dim_0])
            MIGRAPHX_THROW("Inner dimensions do not match: {" + to_string_range(a.lens()) +
                           "} x {" + to_string_range(b.lens()) + "}");
-        auto out_lens        = a.lens();
-        out_lens[n_dims - 1] = b.lens()[n_dims - 1];
+        auto out_lens   = a.lens();
+        out_lens[dim_1] = b.lens()[dim_1];
        return {t, out_lens};
    }
 };

--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -453,7 +453,7 @@ struct onnx_parser
        // if dim_size is 0, it is a scalar
        if(dim_size == 0)
        {
-            migraphx::shape scalar_shape{v.get_shape().type(), {1}, {0}};
+            migraphx::shape scalar_shape{v.get_shape().type()};
            return prog.add_literal(migraphx::literal{scalar_shape, v.data()});
        }

@@ -484,7 +484,11 @@ struct onnx_parser
            transb = parse_value(attributes.at("transB")).at<bool>();
        }

-        std::vector<int64_t> perm = {1, 0};
+        std::vector<int64_t> perm(args[0]->get_shape().lens().size());
+        std::iota(perm.begin(), perm.end(), int64_t{0});
+        // swap the last two elements
+        std::swap(*perm.rbegin(), *(perm.rbegin() + 1));
+
        auto l1 = (transa) ? prog.add_instruction(op::transpose{perm}, args[0]) : args[0];
        auto l2 = (transb) ? prog.add_instruction(op::transpose{perm}, args[1]) : args[1];
        if(args.size() == 3)
@@ -504,10 +508,7 @@ struct onnx_parser
                return add_broadcastable_binary_op(l3, l4, op::add{});
            }
        }
-
-        auto dot_res = prog.add_instruction(op::dot{alpha, beta}, l1, l2);
-
-        return dot_res;
+        return prog.add_instruction(op::dot{alpha, beta}, l1, l2);
    }

    instruction_ref

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -2,6 +2,7 @@
 #include <migraphx/stringutils.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/operators.hpp>
+#include <migraphx/target.hpp>
 #include <migraphx/env.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/time.hpp>

--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -19,7 +19,7 @@ struct shape_impl

    shape_impl() : m_type(shape::float_type), m_standard(false) {}

-    shape_impl(shape::type_t t) : m_type(t), m_lens({1}), m_strides({1}), m_standard(true) {}
+    shape_impl(shape::type_t t) : m_type(t), m_lens({1}), m_strides({0}), m_standard(true) {}
    shape_impl(shape::type_t t, std::vector<std::size_t> l)
        : m_type(t), m_lens(std::move(l)), m_standard(true)
    {

--- a/src/targets/cpu/gemm.cpp
+++ b/src/targets/cpu/gemm.cpp
 #include <migraphx/cpu/gemm.hpp>
 #include <migraphx/dfor.hpp>
 #include <migraphx/requires.hpp>
+#include <migraphx/shape_for_each.hpp>
 #include <blaze/math/CustomMatrix.h>

 namespace migraphx {
@@ -70,18 +71,21 @@ void migemm_impl(tensor_view<T> cmat,
    std::size_t n_dims = cmat.get_shape().lens().size();
    std::size_t dim_0  = n_dims - 2;
    std::size_t dim_1  = n_dims - 1;
-    auto m             = cmat.get_shape().lens()[dim_0];
-    auto n             = cmat.get_shape().lens()[dim_1];
    auto k             = amat.get_shape().lens()[dim_1];

    assert(amat.get_shape().lens()[dim_1] == bmat.get_shape().lens()[dim_0]);
-    assert(m == amat.get_shape().lens()[dim_0]);
-    assert(n == bmat.get_shape().lens()[dim_1]);
+    assert(cmat.get_shape().lens()[dim_0] == amat.get_shape().lens()[dim_0]);
+    assert(cmat.get_shape().lens()[dim_1] == bmat.get_shape().lens()[dim_1]);

-    dfor(m, n)([&](auto ii, auto jj) {
-        double s = cmat(ii, jj) * beta;
-        dfor(k)([&](auto kk) { s += amat(ii, kk) * bmat(kk, jj); });
-        cmat(ii, jj) = alpha * s;
+    shape_for_each(cmat.get_shape(), [&](const auto& c_idx) {
+        double s   = cmat(c_idx.begin(), c_idx.end()) * beta;
+        auto a_idx = c_idx;
+        auto b_idx = c_idx;
+        dfor(k)([&](auto kk) {
+            a_idx[dim_1] = b_idx[dim_0] = kk;
+            s += amat(a_idx.begin(), a_idx.end()) * bmat(b_idx.begin(), b_idx.end());
+        });
+        cmat(c_idx.begin(), c_idx.end()) = alpha * s;
    });
 }

@@ -89,7 +93,18 @@ template <class T>
 void migemm_impl(
    tensor_view<T> cmat, tensor_view<T> amat, tensor_view<T> bmat, float alpha, float beta)
 {
-    migemm_impl(cmat, amat, bmat, alpha, beta, is_fast_gemm_type<T>{});
+    auto lens = amat.get_shape().lens();
+    bool batch_mul =
+        std::accumulate(lens.begin(), lens.end(), std::size_t{1}, std::multiplies<std::size_t>()) ==
+        (*lens.rbegin()) * (*(lens.rbegin() + 1));
+    if(batch_mul)
+    {
+        migemm_impl(cmat, amat, bmat, alpha, beta, is_fast_gemm_type<T>{});
+    }
+    else
+    {
+        migemm_impl(cmat, amat, bmat, alpha, beta, std::false_type{});
+    }
 }

 void migemm(

--- a/src/targets/cpu/include/migraphx/cpu/target.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/target.hpp
@@ -7,6 +7,7 @@

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+struct pass;
 namespace cpu {

 struct target

--- a/src/targets/cpu/target.cpp
+++ b/src/targets/cpu/target.cpp

 #include <migraphx/cpu/target.hpp>
 #include <migraphx/cpu/lowering.hpp>
+#include <migraphx/pass.hpp>
 #include <migraphx/auto_contiguous.hpp>
 #include <migraphx/rewrite_rnn.hpp>
 #include <migraphx/dead_code_elimination.hpp>

--- a/src/targets/gpu/abs.cpp
+++ b/src/targets/gpu/abs.cpp
 #include <migraphx/gpu/abs.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/batchnorm.cpp
+++ b/src/targets/gpu/batchnorm.cpp
 #include <migraphx/gpu/batchnorm.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/concat.cpp
+++ b/src/targets/gpu/concat.cpp
 #include <migraphx/gpu/concat.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
+#include <migraphx/gpu/context.hpp>
 #include <migraphx/gpu/device/concat.hpp>
-#include <utility>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/contiguous.cpp
+++ b/src/targets/gpu/contiguous.cpp
 #include <migraphx/gpu/contiguous.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/device/contiguous.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/convolution.cpp
+++ b/src/targets/gpu/convolution.cpp
 #include <migraphx/gpu/convolution.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/generate.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/elu.cpp
+++ b/src/targets/gpu/elu.cpp
 #include <migraphx/gpu/elu.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -3,6 +3,7 @@
 #include <migraphx/gpu/miopen.hpp>
 #include <migraphx/gpu/convolution.hpp>
 #include <migraphx/gpu/device/add_relu.hpp>
+#include <migraphx/gpu/device/add.hpp>
 #include <migraphx/instruction.hpp>

 namespace migraphx {

--- a/src/targets/gpu/gather.cpp
+++ b/src/targets/gpu/gather.cpp
 #include <migraphx/gpu/gather.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <migraphx/gpu/device/concat.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/gpu/device/gather.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/gemm.cpp
+++ b/src/targets/gpu/gemm.cpp
 #include <migraphx/gpu/gemm.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <utility>
+#include <migraphx/gpu/context.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/include/migraphx/gpu/abs.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/abs.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_ABS_HPP
 #define MIGRAPHX_GUARD_RTGLIB_ABS_HPP

-#include <migraphx/gpu/lowering.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/instruction.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/generate.hpp>
-#include <migraphx/shape_for_each.hpp>
-#include <migraphx/config.hpp>
+#include <migraphx/shape.hpp>
 #include <migraphx/gpu/miopen.hpp>
-#include <migraphx/gpu/hip.hpp>
-#include <migraphx/dfor.hpp>
-#include <migraphx/gpu/device/contiguous.hpp>
-#include <migraphx/gpu/device/add.hpp>
-#include <migraphx/iterator_for.hpp>
-#include <migraphx/gpu/rocblas.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <utility>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

+struct context;
+
 struct miopen_abs
 {
    shared<activation_descriptor> ad;

--- a/src/targets/gpu/include/migraphx/gpu/acos.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/acos.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_ACOS_HPP
 #define MIGRAPHX_GUARD_RTGLIB_ACOS_HPP

-#include <migraphx/gpu/lowering.hpp>
 #include <migraphx/gpu/oper.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/instruction.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/generate.hpp>
-#include <migraphx/shape_for_each.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <migraphx/gpu/hip.hpp>
-#include <migraphx/dfor.hpp>
-#include <migraphx/gpu/device/contiguous.hpp>
 #include <migraphx/gpu/device/acos.hpp>
-#include <migraphx/iterator_for.hpp>
-#include <migraphx/gpu/rocblas.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <migraphx/config.hpp>
-#include <utility>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/include/migraphx/gpu/add.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/add.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_ADD_HPP
 #define MIGRAPHX_GUARD_RTGLIB_ADD_HPP

-#include <migraphx/gpu/lowering.hpp>
 #include <migraphx/gpu/oper.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/instruction.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/generate.hpp>
-#include <migraphx/shape_for_each.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <migraphx/gpu/hip.hpp>
-#include <migraphx/dfor.hpp>
-#include <migraphx/gpu/device/contiguous.hpp>
 #include <migraphx/gpu/device/add.hpp>
-#include <migraphx/iterator_for.hpp>
-#include <migraphx/gpu/rocblas.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <migraphx/config.hpp>
-#include <utility>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/include/migraphx/gpu/asin.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/asin.hpp
 #ifndef MIGRAPHX_GUARD_RTGLIB_ASIN_HPP
 #define MIGRAPHX_GUARD_RTGLIB_ASIN_HPP

-#include <migraphx/gpu/lowering.hpp>
 #include <migraphx/gpu/oper.hpp>
-#include <migraphx/manage_ptr.hpp>
-#include <migraphx/instruction.hpp>
-#include <migraphx/operators.hpp>
-#include <migraphx/generate.hpp>
-#include <migraphx/shape_for_each.hpp>
-#include <migraphx/gpu/miopen.hpp>
-#include <migraphx/gpu/hip.hpp>
-#include <migraphx/dfor.hpp>
-#include <migraphx/gpu/device/contiguous.hpp>
 #include <migraphx/gpu/device/asin.hpp>
-#include <migraphx/iterator_for.hpp>
-#include <migraphx/gpu/rocblas.hpp>
-#include <migraphx/gpu/context.hpp>
-#include <migraphx/config.hpp>
-#include <utility>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {