Add more supported operators and optimizations for the cpu backend (#746)

* Add eliminate_data_type pass * Formatting * Auto convert quant ops * Formatting * Flip the order of decompose * Compute max size differently * Formatting * Clamp values in convert * Formatting * Fix loss of precision in reduce * Formatting * Fix bugs in reduction * Fix accumulator type in reference softmax implementation * Formatting * Update convert test * Remove unused variables * Remove unnecessary quant_dot check * Formatting * Add tests * Formatting * Remove unused code * Remove duplicate ops * Remove blaze dependency * Use set since shape::type_t is no hashable on gcc 5 * Formatting * Add dnnl binary op * Formatting * Add binary and eltwise * Formatting * Add softmax * Formatting * Remove unused operators * Add missing files * Formatting * Add lrn * Formatting * Add deconvolution * Formatting * Change allocate default * Add reorder * Formatting * Add reductions * Formatting * Sort lines * Change literals in another loop * Add pow operator * Formatting * Add pow operator * Formatting * Make sure shapes are packed * Allow broadcasted inputs * Remove unused operators * Simplify functions * Remove softmax * Add sub and erf functions * Formatting * Fix bug * Formatting * Improve parallism * Formatting * Allow multiple batch dimensions * Formatting * Move literal transforms out of lowering * Formatting * Add gather operator * Sort lines * Add early exit for carry * Formatting * Add missing concat * Rename macro * Fix deep nesting * Formatting * Fix cppcheck issues * Remov else * Move attribute to typedef * Formatting * Disable maybe-uninitialized warning since its broken on gcc * Add constexpr default constructor * Formatting * Fix compiler warnings * Fix adjust_allocation test Co-authored-by: Shucai Xiao <shucai@gmail.com> Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>

Add more supported operators and optimizations for the cpu backend (#746)
* Add eliminate_data_type pass * Formatting * Auto convert quant ops * Formatting * Flip the order of decompose * Compute max size differently * Formatting * Clamp values in convert * Formatting * Fix loss of precision in reduce * Formatting * Fix bugs in reduction * Fix accumulator type in reference softmax implementation * Formatting * Update convert test * Remove unused variables * Remove unnecessary quant_dot check * Formatting * Add tests * Formatting * Remove unused code * Remove duplicate ops * Remove blaze dependency * Use set since shape::type_t is no hashable on gcc 5 * Formatting * Add dnnl binary op * Formatting * Add binary and eltwise * Formatting * Add softmax * Formatting * Remove unused operators * Add missing files * Formatting * Add lrn * Formatting * Add deconvolution * Formatting * Change allocate default * Add reorder * Formatting * Add reductions * Formatting * Sort lines * Change literals in another loop * Add pow operator * Formatting * Add pow operator * Formatting * Make sure shapes are packed * Allow broadcasted inputs * Remove unused operators * Simplify functions * Remove softmax * Add sub and erf functions * Formatting * Fix bug * Formatting * Improve parallism * Formatting * Allow multiple batch dimensions * Formatting * Move literal transforms out of lowering * Formatting * Add gather operator * Sort lines * Add early exit for carry * Formatting * Add missing concat * Rename macro * Fix deep nesting * Formatting * Fix cppcheck issues * Remov else * Move attribute to typedef * Formatting * Disable maybe-uninitialized warning since its broken on gcc * Add constexpr default constructor * Formatting * Fix compiler warnings * Fix adjust_allocation test Co-authored-by: Shucai Xiao <shucai@gmail.com> Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>
a0b570b2 · Paul Fultz II · GitHub · 165d1a17 · a0b570b2 · a0b570b2
Unverified Commit a0b570b2 authored Feb 26, 2021 by Paul Fultz II Committed by GitHub Feb 26, 2021
20 changed files
--- a/src/targets/cpu/reduction.cpp
+++ b/src/targets/cpu/reduction.cpp
+#include <migraphx/config.hpp>
+#include <migraphx/cpu/dnnl.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace cpu {
+
+struct dnnl_reduction : dnnl_op<dnnl_reduction, dnnl::reduction>
+{
+    std::string algo;
+    std::vector<std::int64_t> axes{};
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.algo, "algo"), f(self.axes, "axes"));
+    }
+
+    std::string name() const { return "dnnl::reduction"; }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        // Compensate for allocation
+        inputs.pop_back();
+        check_shapes{inputs, *this}.has(1).standard();
+        auto s    = inputs.at(0);
+        auto lens = s.lens();
+        for(auto axis : axes)
+        {
+            lens[axis] = 1;
+        }
+        auto r = shape{s.type(), lens};
+        // Call to get_primitive to make sure an algo is available
+        this->get_primitive(this->to_memory_desc(r, inputs));
+        return r;
+    }
+
+    dnnl::reduction::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
+    {
+        return {to_dnnl_algo(algo), m.at(DNNL_ARG_SRC), m.at(DNNL_ARG_DST), 0, 0};
+    }
+};
+
+} // namespace cpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/cpu/reorder.cpp
+++ b/src/targets/cpu/reorder.cpp
+#include <migraphx/config.hpp>
+#include <migraphx/cpu/dnnl.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace cpu {
+
+struct dnnl_reorder : dnnl_op<dnnl_reorder, dnnl::reorder>
+{
+    template <class Self, class F>
+    static auto reflect(Self&, F)
+    {
+        return pack();
+    }
+
+    std::string name() const { return "dnnl::reorder"; }
+
+    shape adjust_shape(const shape& x, int) const { return x; }
+
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(2);
+        return inputs.back();
+    }
+    // Custom desc class since its missing in dnnl
+    struct desc
+    {
+        dnnl::memory::desc src;
+        dnnl::memory::desc dst;
+    };
+    desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
+    {
+        return {m.at(DNNL_ARG_SRC), m.at(DNNL_ARG_DST)};
+    }
+
+    auto get_primitive_desc(const desc& d) const
+    {
+        auto& engine = get_dnnl_context().engine;
+        return dnnl::reorder::primitive_desc(engine, d.src, engine, d.dst);
+    }
+};
+
+} // namespace cpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/cpu/relu.cpp
+++ b/src/targets/cpu/relu.cpp
 #include <migraphx/config.hpp>
-#include <migraphx/cpu/pointwise.hpp>
-#include <migraphx/op/relu.hpp>
+#include <migraphx/cpu/dnnl.hpp>
+#include <migraphx/op/softmax.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace cpu {

-struct dnnl_relu : dnnl_extend_op<dnnl_relu, dnnl::eltwise_forward, op::relu>
+struct dnnl_softmax : dnnl_extend_op<dnnl_softmax, dnnl::softmax_forward, op::softmax>
 {
-    dnnl::eltwise_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
+    dnnl::softmax_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
    {
-        return {dnnl::prop_kind::forward_inference,
-                dnnl::algorithm::eltwise_relu,
-                m.at(DNNL_ARG_SRC_0)};
+        int axis = this->op.axis;
+        return {dnnl::prop_kind::forward_inference, m.at(DNNL_ARG_SRC_0), axis};
    }
 };


--- a/src/targets/cpu/add.cpp
+++ b/src/targets/cpu/add.cpp
 #include <migraphx/config.hpp>
 #include <migraphx/cpu/pointwise.hpp>
-#include <migraphx/op/add.hpp>
+#include <migraphx/op/sub.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace cpu {

-struct dnnl_add : dnnl_extend_op<dnnl_add, dnnl::binary, op::add>
-{
-    dnnl::binary::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
-    {
-        return {dnnl::algorithm::binary_add,
-                m.at(DNNL_ARG_SRC_0),
-                m.at(DNNL_ARG_SRC_1),
-                m.at(DNNL_ARG_DST)};
-    }
-};
+template struct cpu_binary<op::sub>;

 } // namespace cpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/cpu/target.cpp
+++ b/src/targets/cpu/target.cpp
@@ -22,6 +22,7 @@
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/simplify_algebra.hpp>
 #include <migraphx/simplify_reshapes.hpp>
+#include <migraphx/cpu/write_literals.hpp>
 #include <migraphx/cpu/allocation_model.hpp>
 #include <migraphx/cpu/target.hpp>
 #include <migraphx/cpu/lowering.hpp>
@@ -38,7 +39,6 @@ std::string target::name() const { return "cpu"; }
 std::vector<pass> target::get_passes(migraphx::context&, const compile_options&) const
 {
    std::set<shape::type_t> unsupported_types(shape::types().begin(), shape::types().end());
-    unsupported_types.erase(shape::type_t::double_type);
    unsupported_types.erase(shape::type_t::float_type);
    return {normalize_ops{},
            eliminate_data_type{unsupported_types, shape::type_t::float_type},
@@ -63,10 +63,12 @@ std::vector<pass> target::get_passes(migraphx::context&, const compile_options&)
            propagate_constant{},
            dead_code_elimination{},
            lowering{},
-            eliminate_contiguous{},
+            eliminate_contiguous{"dnnl::reorder"},
            dead_code_elimination{},
            adjust_allocation{cpu_allocation_model{}},
            dead_code_elimination{},
+            write_literals{},
+            dead_code_elimination{},
            memory_coloring{"cpu::allocate"},
            dead_code_elimination{}};
 }

--- a/src/targets/cpu/write_literals.cpp
+++ b/src/targets/cpu/write_literals.cpp
+#include <migraphx/cpu/write_literals.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/iterator_for.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace cpu {
+
+struct cpu_literal
+{
+    argument data;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.data, "data"));
+    }
+
+    std::string name() const { return "cpu::literal"; }
+
+    shape compute_shape(const std::vector<shape>&) const { return data.get_shape(); }
+
+    argument compute(const shape&, const std::vector<argument>&) const { return data; }
+
+    friend std::ostream& operator<<(std::ostream& os, const cpu_literal& x)
+    {
+        os << x.name();
+        return os;
+    }
+};
+
+void write_literals::apply(module& m) const
+{
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name() != "@literal")
+            continue;
+        m.replace_instruction(ins, cpu_literal{ins->get_literal().get_argument()});
+    }
+}
+
+} // namespace cpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -68,7 +68,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        propagate_constant{},
        dead_code_elimination{},
        lowering{&ctx, options.offload_copy},
-        eliminate_contiguous{},
+        eliminate_contiguous{"gpu::contiguous"},
        dead_code_elimination{},
        eliminate_concat{concat_gpu_optimization{}},
        dead_code_elimination{},

--- a/test/eliminate_contiguous_test.cpp
+++ b/test/eliminate_contiguous_test.cpp
@@ -8,7 +8,8 @@

 void run_pass(migraphx::module& m)
 {
-    migraphx::run_passes(m, {migraphx::eliminate_contiguous{}, migraphx::dead_code_elimination{}});
+    migraphx::run_passes(
+        m, {migraphx::eliminate_contiguous{"contiguous"}, migraphx::dead_code_elimination{}});
 }

 TEST_CASE(standard_op)

--- a/test/gpu/adjust_allocation.cpp
+++ b/test/gpu/adjust_allocation.cpp
@@ -23,7 +23,7 @@ void run_lowering(migraphx::program& p)
                         {migraphx::auto_contiguous{},
                          migraphx::gpu::lowering{&ctx, false},
                          migraphx::dead_code_elimination{},
-                          migraphx::eliminate_contiguous{},
+                          migraphx::eliminate_contiguous{"gpu::contiguous"},
                          migraphx::dead_code_elimination{}});
 }


--- a/test/verify/test_acos.cpp
+++ b/test/verify/test_acos.cpp
@@ -10,7 +10,7 @@ struct test_acos : verify_program<test_acos>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {16}};
+        migraphx::shape s{migraphx::shape::float_type, {16}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("acos"), x);
        return p;

--- a/test/verify/test_asin.cpp
+++ b/test/verify/test_asin.cpp
@@ -10,7 +10,7 @@ struct test_asin : verify_program<test_asin>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {16}};
+        migraphx::shape s{migraphx::shape::float_type, {16}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("asin"), x);
        return p;

--- a/test/verify/test_asinh.cpp
+++ b/test/verify/test_asinh.cpp
@@ -10,7 +10,7 @@ struct test_asinh : verify_program<test_asinh>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {16}};
+        migraphx::shape s{migraphx::shape::float_type, {16}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("asinh"), x);
        return p;

--- a/test/verify/test_atan.cpp
+++ b/test/verify/test_atan.cpp
@@ -10,7 +10,7 @@ struct test_atan : verify_program<test_atan>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {16}};
+        migraphx::shape s{migraphx::shape::float_type, {16}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("atan"), x);
        return p;

--- a/test/verify/test_atanh.cpp
+++ b/test/verify/test_atanh.cpp
@@ -10,10 +10,10 @@ struct test_atanh : verify_program<test_atanh>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {16}};
+        migraphx::shape s{migraphx::shape::float_type, {16}};
        auto x       = mm->add_parameter("x", s);
-        auto min_val = mm->add_literal(-0.95);
-        auto max_val = mm->add_literal(0.95);
+        auto min_val = mm->add_literal(-0.95f);
+        auto max_val = mm->add_literal(0.95f);
        min_val = mm->add_instruction(migraphx::make_op("multibroadcast", {{"output_lens", {16}}}),
                                      min_val);
        max_val = mm->add_instruction(migraphx::make_op("multibroadcast", {{"output_lens", {16}}}),

--- a/test/verify/test_cos.cpp
+++ b/test/verify/test_cos.cpp
@@ -10,7 +10,7 @@ struct test_cos : verify_program<test_cos>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {8}};
+        migraphx::shape s{migraphx::shape::float_type, {8}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("cos"), x);
        return p;

--- a/test/verify/test_cosh.cpp
+++ b/test/verify/test_cosh.cpp
@@ -10,7 +10,7 @@ struct test_cosh : verify_program<test_cosh>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {16}};
+        migraphx::shape s{migraphx::shape::float_type, {16}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("cosh"), x);
        return p;

--- a/test/verify/test_logsoftmax.cpp
+++ b/test/verify/test_logsoftmax.cpp
@@ -23,8 +23,6 @@ template struct test_logsoftmax<0, migraphx::shape::float_type>;
 template struct test_logsoftmax<1, migraphx::shape::float_type>;
 template struct test_logsoftmax<2, migraphx::shape::float_type>;
 template struct test_logsoftmax<3, migraphx::shape::float_type>;
-template struct test_logsoftmax<1, migraphx::shape::double_type>;
-template struct test_logsoftmax<3, migraphx::shape::double_type>;
 template struct test_logsoftmax<1, migraphx::shape::half_type>;
 template struct test_logsoftmax<0, migraphx::shape::half_type>;
 template struct test_logsoftmax<2, migraphx::shape::half_type>;

--- a/test/verify/test_recip.cpp
+++ b/test/verify/test_recip.cpp
@@ -10,7 +10,7 @@ struct test_recip : verify_program<test_recip>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {3}};
+        migraphx::shape s{migraphx::shape::float_type, {3}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("recip"), x);
        return p;

--- a/test/verify/test_sinh.cpp
+++ b/test/verify/test_sinh.cpp
@@ -10,7 +10,7 @@ struct test_sinh : verify_program<test_sinh>
    {
        migraphx::program p;
        auto* mm = p.get_main_module();
-        migraphx::shape s{migraphx::shape::double_type, {16}};
+        migraphx::shape s{migraphx::shape::float_type, {16}};
        auto x = mm->add_parameter("x", s);
        mm->add_instruction(migraphx::make_op("sinh"), x);
        return p;

--- a/test/verify/test_softmax.cpp
+++ b/test/verify/test_softmax.cpp
@@ -21,8 +21,6 @@ struct test_softmax : verify_program<test_softmax<Axis, T>>

 template struct test_softmax<0, migraphx::shape::float_type>;
 template struct test_softmax<2, migraphx::shape::float_type>;
-template struct test_softmax<1, migraphx::shape::double_type>;
-template struct test_softmax<3, migraphx::shape::double_type>;
 template struct test_softmax<0, migraphx::shape::half_type>;
 template struct test_softmax<1, migraphx::shape::half_type>;
 template struct test_softmax<2, migraphx::shape::half_type>;