Add a pass to remove unsupported data types (#738)

* Add eliminate_data_type pass * Formatting * Auto convert quant ops * Formatting * Flip the order of decompose * Compute max size differently * Formatting * Clamp values in convert * Formatting * Fix loss of precision in reduce * Formatting * Fix bugs in reduction * Fix accumulator type in reference softmax implementation * Formatting * Update convert test * Remove unused variables * Remove unnecessary quant_dot check * Formatting * Add tests * Formatting * Remove unused code * Remove duplicate ops * Remove blaze dependency * Use set since shape::type_t is no hashable on gcc 5 * Formatting Co-authored-by: Shucai Xiao <shucai@gmail.com> Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>

Add a pass to remove unsupported data types (#738)
* Add eliminate_data_type pass * Formatting * Auto convert quant ops * Formatting * Flip the order of decompose * Compute max size differently * Formatting * Clamp values in convert * Formatting * Fix loss of precision in reduce * Formatting * Fix bugs in reduction * Fix accumulator type in reference softmax implementation * Formatting * Update convert test * Remove unused variables * Remove unnecessary quant_dot check * Formatting * Add tests * Formatting * Remove unused code * Remove duplicate ops * Remove blaze dependency * Use set since shape::type_t is no hashable on gcc 5 * Formatting Co-authored-by: Shucai Xiao <shucai@gmail.com> Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>
3d24a21c · Paul Fultz II · GitHub · 5d0ca2a6 · 3d24a21c · 3d24a21c
Unverified Commit 3d24a21c authored Feb 07, 2021 by Paul Fultz II Committed by GitHub Feb 07, 2021
20 changed files
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -15,6 +15,7 @@ add_library(migraphx
    eliminate_allocation.cpp
    eliminate_contiguous.cpp
    eliminate_concat.cpp
+    eliminate_data_type.cpp
    eliminate_identity.cpp
    eliminate_pad.cpp
    file_buffer.cpp

--- a/src/decompose.cpp
+++ b/src/decompose.cpp
--- a/src/eliminate_allocation.cpp
+++ b/src/eliminate_allocation.cpp
--- a/src/eliminate_data_type.cpp
+++ b/src/eliminate_data_type.cpp
+#include <migraphx/eliminate_data_type.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/instruction.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+void eliminate_data_type::apply(module& m) const
+{
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name()[0] == '@')
+            continue;
+        auto inputs = ins->inputs();
+        std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto i) {
+            if(types.count(i->get_shape().type()) == 0)
+                return i;
+            return m.insert_instruction(ins, make_op("convert", {{"target_type", target_type}}), i);
+        });
+        if(inputs == ins->inputs())
+            continue;
+        auto op         = ins->get_operator();
+        auto attributes = op.attributes();
+        if(attributes.contains("general_data_type"))
+        {
+            op = make_op(attributes["general_data_type"].to<std::string>(), op.to_value());
+        }
+        auto old_type = ins->get_shape().type();
+        auto out      = m.insert_instruction(ins, op, inputs);
+        auto convert =
+            m.insert_instruction(ins, make_op("convert", {{"target_type", old_type}}), out);
+        m.replace_instruction(ins, convert);
+    }
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/include/migraphx/eliminate_data_type.hpp
+++ b/src/include/migraphx/eliminate_data_type.hpp
+#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_ELIMINATE_DATA_TYPE_HPP
+#define MIGRAPHX_GUARD_AMDMIGRAPHX_ELIMINATE_DATA_TYPE_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/shape.hpp>
+#include <set>
+#include <string>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct module;
+
+/**
+ * Remove data types. This will instert convert operators so the data type
+ * is not used by any operator.
+ */
+struct eliminate_data_type
+{
+    std::set<shape::type_t> types;
+    shape::type_t target_type;
+    std::string name() const { return "eliminate_data_type"; }
+    void apply(module& m) const;
+};
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/generate.hpp
+++ b/src/include/migraphx/generate.hpp
@@ -25,7 +25,7 @@ constexpr T normalize(unsigned long z)
 template <class T, MIGRAPHX_REQUIRES(is_signed<T>{} and not is_floating_point<T>{})>
 constexpr T normalize(unsigned long z)
 {
-    const auto max      = std::numeric_limits<T>::max() / 64;
+    const auto max      = 1UL << (sizeof(T) * 5);
    const auto half_max = max / 2;
    return half_max - (z % max);
 }
@@ -33,7 +33,7 @@ constexpr T normalize(unsigned long z)
 template <class T, MIGRAPHX_REQUIRES(not is_signed<T>{} and std::is_integral<T>{})>
 constexpr T normalize(unsigned long z)
 {
-    const auto max = std::numeric_limits<T>::max() / 64;
+    const auto max = 1UL << (sizeof(T) * 5);
    return z % max;
 }


--- a/src/include/migraphx/op/convert.hpp
+++ b/src/include/migraphx/op/convert.hpp
@@ -34,7 +34,12 @@ struct convert : unary<convert>

    auto apply() const
    {
-        return [](auto x) { return x; };
+        auto type = target_type;
+        return [type](auto x) {
+            auto y = x;
+            shape::visit(type, [&](auto as) { y = std::min(std::max(as(x), as.min()), as.max()); });
+            return y;
+        };
    }

    convert(shape::type_t t) : target_type{t} {}

--- a/src/include/migraphx/op/quant_convolution.hpp
+++ b/src/include/migraphx/op/quant_convolution.hpp
@@ -8,6 +8,7 @@
 #include <migraphx/streamutils.hpp>
 #include <migraphx/literal.hpp>
 #include <migraphx/shape_for_each.hpp>
+#include <migraphx/value.hpp>
 #include <migraphx/config.hpp>
 #include <cmath>
 #include <utility>
@@ -35,6 +36,8 @@ struct quant_convolution
                    f(self.group, "group"));
    }

+    value attributes() const { return {{"general_data_type", "convolution"}}; }
+
    std::string name() const { return "quant_convolution"; }

    void check_attribute_size() const

--- a/src/include/migraphx/op/quant_dot.hpp
+++ b/src/include/migraphx/op/quant_dot.hpp
@@ -8,6 +8,7 @@
 #include <migraphx/literal.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/value.hpp>
 #include <cmath>
 #include <utility>

@@ -26,6 +27,8 @@ struct quant_dot
        return pack(f(self.alpha, "alpha"), f(self.beta, "beta"));
    }

+    value attributes() const { return {{"general_data_type", "dot"}}; }
+
    std::string name() const { return "quant_dot"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
@@ -59,13 +62,6 @@ struct quant_dot
                           to_string_range(a.lens()) + "} x {" + to_string_range(b.lens()) + "}");
        }

-        // k be multiple of 4
-        if((a.lens()[dim_1] % 4) != 0)
-        {
-            MIGRAPHX_THROW("QUANT_DOT: size of A {" + to_string_range(a.lens()) + "} and B {" +
-                           to_string_range(b.lens()) + "} must be multiple of 4 for int8 type");
-        }
-
        auto out_lens   = a.lens();
        out_lens[dim_1] = b.lens()[dim_1];
        if(inputs.size() == 3 && out_lens != inputs.at(2).lens())

--- a/src/include/migraphx/op/reduce_mean.hpp
+++ b/src/include/migraphx/op/reduce_mean.hpp
@@ -14,12 +14,12 @@ struct reduce_mean : reduce_op<reduce_mean>

    auto op() const
    {
-        return [=](auto x, auto y) { return x + y; };
+        return [](auto x, auto y) { return x + y; };
    }

    auto output(const shape& s) const
    {
-        return [&](auto val) { return val / s.elements(); };
+        return [&](auto val) { return val / static_cast<decltype(val)>(s.elements()); };
    }
 };


--- a/src/include/migraphx/op/reduce_op.hpp
+++ b/src/include/migraphx/op/reduce_op.hpp
@@ -113,13 +113,14 @@ struct reduce_op : op_name<Derived>
                std::vector<std::size_t>& out_idx,
                tensor_view<T>& output) const
    {
-        auto data_idx = out_idx;
-        T val         = static_cast<const Derived&>(*this).init();
+        using accumulator = accumulator_type<T>;
+        auto& self        = static_cast<const Derived&>(*this);
+        auto data_idx     = out_idx;
+        accumulator val   = self.init();
        shape_for_each(batch_shape, [&](auto b_idx) {
            this->tune_dims(tuned_axes, b_idx, data_idx);
-            val = static_cast<const Derived&>(*this).op()(
-                static_cast<const Derived&>(*this).input()(input(data_idx.begin(), data_idx.end())),
-                val);
+            accumulator x = input(data_idx.begin(), data_idx.end());
+            val           = self.op()(accumulator{self.input()(x)}, val);
        });

        output(out_idx.begin(), out_idx.end()) =
@@ -148,12 +149,12 @@ struct reduce_op : op_name<Derived>

    auto input() const
    {
-        return [&](auto val) { return val; };
+        return [](auto val) { return val; };
    }

    auto output(const shape&) const
    {
-        return [&](auto val) { return val; };
+        return [](auto val) { return val; };
    }

    reduce_op() {}

--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -59,6 +59,8 @@ struct shape
    {
    };

+    static const std::vector<type_t>& types();
+
    shape();
    shape(type_t t);
    shape(type_t t, std::vector<std::size_t> l);
@@ -128,6 +130,10 @@ struct shape
    {
        using type = std::conditional_t<std::is_same<T, bool>{}, int8_t, T>;

+        type max() const { return std::numeric_limits<type>::max(); }
+
+        type min() const { return std::numeric_limits<type>::lowest(); }
+
        template <class U>
        type operator()(U u) const
        {
@@ -166,9 +172,9 @@ struct shape
    };

    template <class Visitor>
-    void visit_type(Visitor v) const
+    static void visit(type_t t, Visitor v)
    {
-        switch(this->type())
+        switch(t)
        {
 #define MIGRAPHX_SHAPE_GENERATE_VISITOR_CASE(x, t) \
    case x: v(as<t>()); return;
@@ -178,6 +184,12 @@ struct shape
        MIGRAPHX_THROW("Unknown type");
    }

+    template <class Visitor>
+    void visit_type(Visitor v) const
+    {
+        visit(this->type(), v);
+    }
+
    template <class Visitor>
    static void visit_types(Visitor v)
    {

--- a/src/include/migraphx/type_traits.hpp
+++ b/src/include/migraphx/type_traits.hpp
@@ -30,6 +30,12 @@ MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, half)
 MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, half)
 MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, half)

+template <class T>
+using accumulator_type =
+    std::conditional_t<is_floating_point<T>{},
+                       double,
+                       std::conditional_t<is_signed<T>{}, std::int64_t, std::uint64_t>>;
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -79,6 +79,14 @@ struct shape_impl
    }
 };

+const std::vector<shape::type_t>& shape::types()
+{
+    static const std::vector<shape::type_t> result = {
+#define MIGRAPHX_GENERATE_TYPE_VECTOR(x, t) x,
+        MIGRAPHX_SHAPE_VISIT_TYPES(MIGRAPHX_GENERATE_TYPE_VECTOR)};
+    return result;
+}
+
 shape::shape() : impl(shape_impl::default_shape()) {}

 shape::shape(type_t t) : impl(std::make_shared<shape_impl>(t)) {}

--- a/src/simplify_reshapes.cpp
+++ b/src/simplify_reshapes.cpp
@@ -157,6 +157,23 @@ struct find_transpose
    }
 };

+struct find_nested_convert
+{
+    auto matcher() const { return match::name("convert")(match::arg(0)(match::name("convert"))); }
+
+    void apply(module& m, const match::matcher_result& mr) const
+    {
+        auto ins   = mr.result;
+        auto x     = ins->inputs().front();
+        auto input = x->inputs().front();
+
+        if(ins->get_shape() != input->get_shape())
+            return;
+
+        m.replace_instruction(ins, input);
+    }
+};
+
 struct find_nested_slice
 {
    auto matcher() const { return match::name("slice")(match::arg(0)(match::name("slice"))); }
@@ -310,6 +327,7 @@ void simplify_reshapes::apply(module& p) const
                            find_reshaper{},
                            find_transpose{},
                            find_concat_transpose{},
+                            find_nested_convert{},
                            find_nested_slice{},
                            find_nested_concat{});
        dead_code_elimination{}.apply(p);

--- a/src/targets/cpu/CMakeLists.txt
+++ b/src/targets/cpu/CMakeLists.txt
@@ -13,21 +13,16 @@ add_library(migraphx_cpu
    gemm.cpp
    target.cpp
    lowering.cpp
-    migemm.cpp
 )
 set_target_properties(migraphx_cpu PROPERTIES EXPORT_NAME cpu)
 rocm_set_soversion(migraphx_cpu ${MIGRAPHX_SO_VERSION})

-find_path(BLAZE_INCLUDE blaze/Blaze.h)
 find_package(Threads)
 find_package(dnnl REQUIRED)

 rocm_clang_tidy_check(migraphx_cpu)
 target_link_libraries(migraphx_cpu PRIVATE migraphx Threads::Threads)
-target_include_directories(migraphx_cpu PRIVATE ${BLAZE_INCLUDE})
-target_compile_definitions(migraphx_cpu PRIVATE -DBLAZE_USE_CPP_THREADS)
 target_link_libraries(migraphx_cpu PRIVATE DNNL::dnnl)
-target_compile_definitions(migraphx_cpu PUBLIC -DUSE_DNNL)
 find_package(OpenMP)
 target_link_libraries(migraphx_cpu PUBLIC OpenMP::OpenMP_CXX)
 # Add library path to rpath to workaround issues with our broken packages

--- a/src/targets/cpu/add.cpp
+++ b/src/targets/cpu/add.cpp
@@ -6,9 +6,6 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace cpu {

-template struct cpu_binary<op::add>;
-
-#if USE_DNNL
 struct dnnl_add : dnnl_extend_op<dnnl_add, dnnl::binary, op::add>
 {
    dnnl::binary::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
@@ -19,7 +16,6 @@ struct dnnl_add : dnnl_extend_op<dnnl_add, dnnl::binary, op::add>
                m.at(DNNL_ARG_DST)};
    }
 };
-#endif

 } // namespace cpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/cpu/concat.cpp
+++ b/src/targets/cpu/concat.cpp
@@ -6,7 +6,6 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace cpu {

-#if USE_DNNL
 struct dnnl_concat : dnnl_extend_op<dnnl_concat, dnnl::concat, op::concat>
 {
    std::vector<int> arg_map(int size) const
@@ -39,7 +38,6 @@ struct dnnl_concat : dnnl_extend_op<dnnl_concat, dnnl::concat, op::concat>
        return dnnl::concat::primitive_desc(d.dst, d.axis, d.srcs, get_dnnl_context().engine);
    }
 };
-#endif

 } // namespace cpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/cpu/convolution.cpp
+++ b/src/targets/cpu/convolution.cpp
@@ -12,106 +12,6 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace cpu {

-template <class V, class T, class... Ts>
-void visit_quantize_impl(V&& v, T&& x, Ts&&... xs)
-{
-    x.visit([&](auto y) { visit_all(xs...)([&](auto... ys) { v(y, ys...); }); });
-}
-
-template <class T, class... Ts>
-auto visit_quantize(T&& x, Ts&&... xs)
-{
-    return [&](auto v) {
-        // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70100
-        visit_quantize_impl(v, x, xs...);
-    };
-}
-
-template <class Op>
-struct cpu_convolution : auto_register_op<cpu_convolution<Op>>
-{
-    Op op;
-
-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
-    {
-        return migraphx::reflect(self.op, f);
-    }
-
-    std::string name() const { return "cpu::" + op.name(); }
-    shape compute_shape(std::vector<shape> inputs) const
-    {
-        inputs.pop_back();
-        return op.compute_shape(inputs);
-    }
-
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
-
-    argument compute(context&, shape output_shape, std::vector<argument> args) const
-    {
-        visit_quantize(args.back(), args[0], args[1])([&](auto output, auto input, auto weights) {
-            auto in_lens = input.get_shape().lens();
-
-            auto wei_lens = weights.get_shape().lens();
-            auto wei_n    = wei_lens[0];
-            auto wei_c    = wei_lens[1];
-            std::vector<std::size_t> win_size(wei_lens.begin() + 1, wei_lens.end());
-
-            par_for(output_shape.elements(), [&](auto i) {
-                auto idx_o = output_shape.multi(i);
-                auto w     = idx_o[1];
-                auto n_dim = idx_o.size();
-
-                std::vector<std::ptrdiff_t> win_start;
-                for(std::size_t dim = 2; dim < n_dim; ++dim)
-                {
-                    auto d_2 = dim - 2;
-                    win_start.push_back(std::ptrdiff_t(idx_o[dim] * op.stride[d_2]) -
-                                        std::ptrdiff_t(op.padding[d_2]));
-                }
-                const auto group_id = w / (wei_n / op.group);
-
-                shape win_shape{output_shape.type(), win_size};
-
-                double acc = 0.0;
-                shape_for_each(win_shape, [&](auto idx_win) {
-                    auto k           = idx_win[0];
-                    const auto in_ch = group_id * wei_c + k;
-                    std::vector<std::ptrdiff_t> idx(idx_o.begin(), idx_o.end());
-                    idx[1] = in_ch;
-                    std::transform(idx_win.begin() + 1,
-                                   idx_win.end(),
-                                   win_start.begin(),
-                                   idx.begin() + 2,
-                                   [](std::ptrdiff_t ii, std::ptrdiff_t jj) { return ii + jj; });
-                    std::vector<std::ptrdiff_t> idx_wei(idx_o.size());
-                    idx_wei[0] = w;
-                    std::copy(idx_win.begin(), idx_win.end(), idx_wei.begin() + 1);
-                    if(std::all_of(idx.begin() + 2, idx.end(), [&](auto ii) { return ii >= 0; }) and
-                       std::equal(idx.begin(),
-                                  idx.end(),
-                                  in_lens.begin(),
-                                  in_lens.end(),
-                                  std::less<std::ptrdiff_t>{}))
-                    {
-                        acc +=
-                            input(idx.begin(), idx.end()) * weights(idx_wei.begin(), idx_wei.end());
-                    }
-                });
-
-                output[i] = acc;
-            });
-        });
-        return args.back();
-    }
-};
-template struct cpu_convolution<op::quant_convolution>;
-template struct cpu_convolution<op::convolution>;
-
-#if USE_DNNL
 struct dnnl_convolution
    : dnnl_extend_op<dnnl_convolution, dnnl::convolution_forward, op::convolution>
 {
@@ -151,7 +51,6 @@ struct dnnl_convolution
                to_dnnl_dims(op.padding)};
    }
 };
-#endif

 } // namespace cpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/cpu/gemm.cpp
+++ b/src/targets/cpu/gemm.cpp
@@ -4,7 +4,6 @@
 #include <migraphx/context.hpp>
 #include <migraphx/cpu/context.hpp>
 #include <migraphx/cpu/dnnl.hpp>
-#include <migraphx/cpu/migemm.hpp>
 #include <migraphx/op/dot.hpp>
 #include <migraphx/op/quant_dot.hpp>

@@ -12,7 +11,6 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace cpu {

-#if USE_DNNL
 struct dnnl_gemm : dnnl_extend_op<dnnl_gemm, dnnl::matmul, op::dot>
 {
    std::vector<int> arg_map(int) const { return {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS}; }
@@ -47,126 +45,6 @@ struct dnnl_gemm : dnnl_extend_op<dnnl_gemm, dnnl::matmul, op::dot>
        return {m.at(DNNL_ARG_SRC), m.at(DNNL_ARG_WEIGHTS), m.at(DNNL_ARG_DST)};
    }
 };
-#endif
-
-struct cpu_gemm : auto_register_op<cpu_gemm>
-{
-    op::dot op;
-
-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
-    {
-        return migraphx::reflect(self.op, f);
-    }
-    std::string name() const { return "cpu::dot"; }
-    shape compute_shape(std::vector<shape> inputs) const
-    {
-        check_shapes{inputs, *this}.standard();
-        inputs.pop_back();
-        return op.compute_shape(inputs);
-    }
-
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
-
-    argument compute(context&, const shape&, std::vector<argument> args) const
-    {
-        // 3 inputs, it is alpha * A * B + beta * C, then
-        // A and B are matrices, and C is of the same shape as A * B
-        if(args.size() == 4)
-        {
-            // no need to consider the value of args[2]
-            if(op.beta == 0.0f)
-            {
-                args.back().visit([&](auto output) { std::fill(output.begin(), output.end(), 0); });
-            }
-            else
-            {
-                visit_all(args.back(), args[2])([&](auto output, auto input) {
-                    std::copy(input.begin(), input.end(), output.begin());
-                });
-            }
-
-            migemm(args.back(), args[0], args[1], op.alpha, op.beta);
-
-            return args.back();
-        }
-
-        // 2 input arguments
-        migemm(args.back(), args[0], args[1], op.alpha, 0.0f);
-
-        return args.back();
-    }
-};
-
-struct cpu_quant_gemm : auto_register_op<cpu_quant_gemm>
-{
-    op::quant_dot op;
-
-    template <class Self, class F>
-    static auto reflect(Self& self, F f)
-    {
-        return migraphx::reflect(self.op, f);
-    }
-
-    std::string name() const { return "cpu::quant_dot"; }
-    shape compute_shape(std::vector<shape> inputs) const
-    {
-        check_shapes{inputs, *this}.standard();
-        inputs.pop_back();
-        return op.compute_shape(inputs);
-    }
-
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
-
-    argument compute(context&, const shape&, std::vector<argument> args) const
-    {
-        // 3 inputs, it is alpha * A * B + beta * C, then
-        // A and B are matrices, and C is of the same shape to A * B
-
-        // first, convert the args[0] and args[1] from int8_t to int32_t
-        argument arg_0{{shape::int32_type, {args.at(0).get_shape().lens()}}};
-        argument arg_1{{shape::int32_type, {args.at(1).get_shape().lens()}}};
-        arg_0.visit([&](auto output) {
-            args.at(0).visit(
-                [&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
-        });
-
-        arg_1.visit([&](auto output) {
-            args.at(1).visit(
-                [&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
-        });
-
-        if(args.size() == 4)
-        {
-            // no need to consider the value of args[2]
-            if(op.beta == 0)
-            {
-                args.back().visit([&](auto output) { std::fill(output.begin(), output.end(), 0); });
-            }
-            else
-            {
-                visit_all(args.back(), args[2])([&](auto output, auto input) {
-                    std::copy(input.begin(), input.end(), output.begin());
-                });
-            }
-
-            migemm(args.back(), arg_0, arg_1, op.alpha, op.beta);
-
-            return args.back();
-        }
-
-        // 2 input arguments
-        migemm(args.back(), arg_0, arg_1, op.alpha, int32_t{0});
-
-        return args.back();
-    }
-};

 } // namespace cpu
 } // namespace MIGRAPHX_INLINE_NS