Merge branch 'develop' into mlir-c

2f268bc2 · Paul · f75c5a38 · aa7ff911 · 2f268bc2 · 2f268bc2
Commit 2f268bc2 authored Jun 12, 2022 by Paul
20 changed files
--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -7,6 +7,7 @@
 #include <migraphx/operation.hpp>
 #include <migraphx/quantization.hpp>
 #include <migraphx/generate.hpp>
+#include <migraphx/instruction.hpp>
 #include <migraphx/ref/target.hpp>
 #include <migraphx/stringutils.hpp>
 #include <migraphx/tf.hpp>
@@ -272,6 +273,14 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
            py::arg("op"),
            py::arg("args"),
            py::arg("mod_args") = std::vector<migraphx::module*>{})
+        .def(
+            "add_literal",
+            [](migraphx::module& mm, py::buffer data) {
+                py::buffer_info info = data.request();
+                auto literal_shape   = to_shape(info);
+                return mm.add_literal(literal_shape, reinterpret_cast<char*>(info.ptr));
+            },
+            py::arg("data"))
        .def(
            "add_parameter",
            [](migraphx::module& mm, const std::string& name, const migraphx::shape shape) {

--- a/src/reduce_dims.cpp
+++ b/src/reduce_dims.cpp
@@ -16,11 +16,9 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
        auto bstride = s.strides()[n + 1];
        auto blen    = s.lens()[n + 1];

-        if(astride == bstride * blen)
-        {
+        if(astride == bstride * blen or alen == 1)
            new_lens.push_back(alen * blen);
    }
-    }
    if(new_lens.size() != shapes.size())
        return false;
    std::size_t i = 0;
@@ -37,10 +35,25 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
    return true;
 }

+void reduce_dim1(std::vector<shape>& shapes)
+{
+    if(std::any_of(shapes.begin(), shapes.end(), [&](const auto& s) {
+           return s.lens().size() < 2 or s.lens().back() != 1;
+       }))
+        return;
+    for(auto& s : shapes)
+    {
+        auto lens    = s.lens();
+        auto strides = s.strides();
+        lens.pop_back();
+        strides.pop_back();
+        s = shape{s.type(), lens, strides};
+    }
+}
+
 std::size_t reduce_dim_all(std::vector<shape>& shapes, std::size_t n)
 {
    while(reduce_dim(shapes, n) and n < shapes.size()) {}
-
    return n + 1;
 }
 void reduce_dim_all(std::vector<shape>& shapes)
@@ -48,6 +61,7 @@ void reduce_dim_all(std::vector<shape>& shapes)
    std::size_t n = 0;
    while(n < shapes.front().lens().size() - 1)
        n = reduce_dim_all(shapes, n);
+    reduce_dim1(shapes);
 }

 std::vector<std::size_t> base_lens(const std::vector<shape>& shapes)

--- a/src/rewrite_batchnorm.cpp
+++ b/src/rewrite_batchnorm.cpp
@@ -14,9 +14,9 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

-void rewrite_batchnorm::apply(module& p) const
+void rewrite_batchnorm::apply(module& m) const
 {
-    for(auto ins : iterator_for(p))
+    for(auto ins : iterator_for(m))
    {
        if(ins->name() != "batch_norm_inference")
            continue;
@@ -46,13 +46,13 @@ void rewrite_batchnorm::apply(module& p) const
            });

        auto broadcast   = op::broadcast{1, ins->get_shape().lens()};
-        auto a_ins       = p.add_literal({a.get_shape(), a.data()});
-        auto a_broadcast = p.insert_instruction(ins, broadcast, a_ins);
-        auto mul   = p.insert_instruction(ins, make_op("mul"), ins->inputs().front(), a_broadcast);
-        auto b_ins = p.add_literal({b.get_shape(), b.data()});
-        auto b_broadcast = p.insert_instruction(ins, broadcast, b_ins);
-        auto add         = p.insert_instruction(ins, make_op("add"), mul, b_broadcast);
-        p.replace_instruction(ins, add);
+        auto a_ins       = m.add_literal({a.get_shape(), a.data()});
+        auto a_broadcast = m.insert_instruction(ins, broadcast, a_ins);
+        auto mul   = m.insert_instruction(ins, make_op("mul"), ins->inputs().front(), a_broadcast);
+        auto b_ins = m.add_literal({b.get_shape(), b.data()});
+        auto b_broadcast = m.insert_instruction(ins, broadcast, b_ins);
+        auto add         = m.insert_instruction(ins, make_op("add"), mul, b_broadcast);
+        m.replace_instruction(ins, add);
    }
 }


--- a/src/rewrite_pooling.cpp
+++ b/src/rewrite_pooling.cpp
@@ -12,9 +12,9 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

-void rewrite_pooling::apply(module& prog) const
+void rewrite_pooling::apply(module& m) const
 {
-    for(auto ins : iterator_for(prog))
+    for(auto ins : iterator_for(m))
    {
        if(ins->name() != "pooling")
            continue;
@@ -33,26 +33,25 @@ void rewrite_pooling::apply(module& prog) const
            continue;
        std::int64_t n = s.lens()[0];
        std::int64_t c = s.lens()[1];
-        auto reshape   = prog.insert_instruction(
+        auto reshape   = m.insert_instruction(
            ins, make_op("reshape", {{"dims", {n * c, -1}}}), ins->inputs().front());
        instruction_ref pooling{};

        // average pooling
        if(op.mode == op::pooling_mode::average)
        {
-            pooling =
-                prog.insert_instruction(ins, make_op("reduce_mean", {{"axes", {1}}}), reshape);
+            pooling = m.insert_instruction(ins, make_op("reduce_mean", {{"axes", {1}}}), reshape);
        }
        // max pooling
        else
        {
-            pooling = prog.insert_instruction(ins, make_op("reduce_max", {{"axes", {1}}}), reshape);
+            pooling = m.insert_instruction(ins, make_op("reduce_max", {{"axes", {1}}}), reshape);
        }

        std::vector<int64_t> rsp_lens(lens.size(), 1);
        rsp_lens[0] = n;
        rsp_lens[1] = c;
-        prog.replace_instruction(ins, make_op("reshape", {{"dims", rsp_lens}}), pooling);
+        m.replace_instruction(ins, make_op("reshape", {{"dims", rsp_lens}}), pooling);
    }
 }


--- a/src/rewrite_rnn.cpp
+++ b/src/rewrite_rnn.cpp
--- a/src/schedule.cpp
+++ b/src/schedule.cpp
@@ -42,7 +42,7 @@ struct stream_info
    std::unordered_map<instruction_ref, std::size_t> iweights;
    ins_dep_map mod_implicit_deps;

-    void calc_implicit_deps(const module& p) { mod_implicit_deps = p.calc_implicit_deps(); }
+    void calc_implicit_deps(const module& m) { mod_implicit_deps = m.calc_implicit_deps(); }

    void accumulate_weights(instruction_ref last, const schedule_model& model)
    {
@@ -116,15 +116,15 @@ struct stream_info
        }
    };

-    std::size_t assign_streams(module& p, std::size_t n)
+    std::size_t assign_streams(module& m, std::size_t n)
    {
        assert(n > 0);
        partition critical;
        std::unordered_map<instruction_ref, std::deque<partition>> partitions;
        partitions.reserve(weights.size());
        fix([&](auto self, auto ins, auto& part) {
-            assert(not is_end(ins, p.end()));
-            if(not p.has_instruction(ins))
+            assert(not is_end(ins, m.end()));
+            if(not m.has_instruction(ins))
                return;
            if(contains(partitions, ins))
                return;
@@ -151,8 +151,8 @@ struct stream_info
                }
            }
            // Sort instructions
-            p.move_instruction(ins, p.end());
-        })(std::prev(p.end()), critical);
+            m.move_instruction(ins, m.end());
+        })(std::prev(m.end()), critical);

        // Set the critical partition to stream 0
        set_stream(critical, 0);
@@ -197,13 +197,13 @@ struct stream_info
        }
    };

-    void sort(module& p, std::size_t)
+    void sort(module& m, std::size_t)
    {
        std::set<weight_ins, compare_weight_ins> children;
        std::unordered_map<instruction_ref, std::size_t> visited;
-        auto last      = std::prev(p.end());
+        auto last      = std::prev(m.end());
        auto mw        = this->weights.at(last);
-        auto nw        = mw / (p.size() + 1);
+        auto nw        = mw / (m.size() + 1);
        auto add_child = [&](auto ins) {
            auto x  = 1 + (mw - this->weights.at(ins)) / (nw + 1);
            auto w  = x * this->iweights.at(ins);
@@ -222,10 +222,10 @@ struct stream_info
            // Pop the first element
            auto top = children.begin()->second;
            children.erase(children.begin());
-            p.move_instruction(top, p.begin());
+            m.move_instruction(top, m.begin());
            for(auto ins : top->inputs())
            {
-                if(not p.has_instruction(ins))
+                if(not m.has_instruction(ins))
                    continue;
                add_child(ins);
            }
@@ -234,7 +234,7 @@ struct stream_info
            {
                for(auto ins : mod_implicit_deps.at(top))
                {
-                    assert(p.has_instruction(ins));
+                    assert(m.has_instruction(ins));
                    add_child(ins);
                }
            }
@@ -242,12 +242,12 @@ struct stream_info

        // move dangling parameter to the front so as not be removed
        auto ins = std::next(last);
-        while(ins != p.end())
+        while(ins != m.end())
        {
            auto next = std::next(ins);
            if(ins->name() == "@param")
            {
-                p.move_instruction(ins, p.begin());
+                m.move_instruction(ins, m.begin());
            }
            ins = next;
        }
@@ -364,18 +364,18 @@ struct stream_info
    }

    std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>>
-    find_concurrent_instructions(module& p) const
+    find_concurrent_instructions(module& m) const
    {
        std::unordered_map<instruction_ref, std::vector<std::vector<instruction_ref>>> result;
        std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>> merge_from;
-        dominator_info di = compute_dominator(p);
-        result.reserve(p.size());
-        merge_from.reserve(p.size());
-        for(auto ins : reverse_iterator_for(p))
+        dominator_info di = compute_dominator(m);
+        result.reserve(m.size());
+        merge_from.reserve(m.size());
+        for(auto ins : reverse_iterator_for(m))
        {
            for(auto&& arg : ins->outputs())
            {
-                if(not p.has_instruction(arg))
+                if(not m.has_instruction(arg))
                    continue;
                if(is_merge_point(arg))
                    merge_from[ins].insert(arg);
@@ -415,18 +415,18 @@ struct stream_info
    }

    std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>
-    get_conflicts(module& p)
+    get_conflicts(module& m)
    {

        using conflict_table_type =
            std::unordered_map<instruction_ref, std::unordered_set<instruction_ref>>;
        conflict_table_type conflict_table;
-        auto concur_ins = this->find_concurrent_instructions(p);
+        auto concur_ins = this->find_concurrent_instructions(m);

        // Compute an index for each instruction
        std::unordered_map<instruction_ref, std::size_t> ins2index;
        std::size_t index_total = 0;
-        for(auto ins : iterator_for(p))
+        for(auto ins : iterator_for(m))
            ins2index[ins] = index_total++;

        std::vector<conflict_table_type> thread_conflict_tables(
@@ -507,21 +507,21 @@ struct stream_info
    }
 };

-void schedule::apply(module& p) const
+void schedule::apply(module& m) const
 {
    if(not enable)
        return;

    stream_info si;
-    si.calc_implicit_deps(p);
-    auto last = std::prev(p.end());
+    si.calc_implicit_deps(m);
+    auto last = std::prev(m.end());
    si.accumulate_weights(last, model);
-    auto nstreams = si.assign_streams(p, model.concurrency());
-    si.sort(p, model.concurrency());
+    auto nstreams = si.assign_streams(m, model.concurrency());
+    si.sort(m, model.concurrency());

    if(enabled(MIGRAPHX_TRACE_COMPILE{}) or enabled(MIGRAPHX_TRACE_SCHEDULE{}))
    {
-        p.annotate(std::cout, [&](auto ins) {
+        m.annotate(std::cout, [&](auto ins) {
            if(ins->name() == "@param" and not contains(si.weights, ins))
                return;

@@ -548,9 +548,9 @@ void schedule::apply(module& p) const
    std::unordered_map<instruction_ref, std::size_t> ins2wait;
    std::unordered_map<std::size_t, std::unordered_set<std::size_t>> waited_for;
    std::unordered_map<instruction_ref, std::unordered_set<std::size_t>> ins2waited;
-    ins2wait.reserve(p.size());
-    ins2waited.reserve(p.size());
-    for(auto ins : iterator_for(p))
+    ins2wait.reserve(m.size());
+    ins2waited.reserve(m.size());
+    for(auto ins : iterator_for(m))
    {
        // Only schedule instructions that have a stream
        if(not si.has_stream(ins))
@@ -559,7 +559,7 @@ void schedule::apply(module& p) const
        // Schedule instruction on the stream
        auto stream = si.get_stream(ins);
        assert(stream < model.concurrency());
-        model.sched(p, ins, stream);
+        model.sched(m, ins, stream);
        // Insert wait instructions
        if(si.is_merge_point(ins, stream))
        {
@@ -572,14 +572,14 @@ void schedule::apply(module& p) const
                if(not contains(ins2wait, i))
                {
                    ins2wait[i] = wait_id;
-                    model.record(p, i, wait_id);
+                    model.record(m, i, wait_id);
                    wait_id++;
                }
                auto w = ins2wait.at(i);
                // If we already waited for the event on this stream then dont
                // insert another wait event
                if(not contains(waited_for[stream], w))
-                    model.wait(p, ins, w);
+                    model.wait(m, ins, w);
                // Store the event as waited
                waited_for[stream].insert(w);
                // Store all wait events that have been waited on prior to the recorded instruction
@@ -594,7 +594,7 @@ void schedule::apply(module& p) const
    }

    // Add memory conflicts
-    auto conflict_table = si.get_conflicts(p);
+    auto conflict_table = si.get_conflicts(m);
    for(auto&& ip : conflict_table)
    {
        if(ip.second.empty())
@@ -602,7 +602,7 @@ void schedule::apply(module& p) const
        std::vector<instruction_ref> args;
        args.push_back(ip.first);
        args.insert(args.end(), ip.second.begin(), ip.second.end());
-        p.insert_instruction(std::next(ip.first), make_op("identity"), args);
+        m.insert_instruction(std::next(ip.first), make_op("identity"), args);
    }
 }


--- a/src/simplify_algebra.cpp
+++ b/src/simplify_algebra.cpp
--- a/src/simplify_qdq.cpp
+++ b/src/simplify_qdq.cpp
@@ -53,7 +53,7 @@ struct match_find_quantizable_ops
            match::arg(1)(dequantizelinear_op("x2", "scale2")));
    }

-    void apply(module& m, match::matcher_result r) const
+    void apply(module& m, const match::matcher_result& r) const
    {
        auto qop    = r.result;
        auto q1     = r.instructions["x1"];

--- a/src/simplify_reshapes.cpp
+++ b/src/simplify_reshapes.cpp
--- a/src/targets/cpu/copy.cpp
+++ b/src/targets/cpu/copy.cpp
@@ -20,7 +20,6 @@ struct cpu_copy : reduce_dims_base, auto_register_op<cpu_copy>
        return inputs.at(1);
    }
    argument
-    // cppcheck-suppress constParameter
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
    {
        argument result = get_arg(args, args.size() - 1);

--- a/src/targets/cpu/gather.cpp
+++ b/src/targets/cpu/gather.cpp
@@ -26,7 +26,6 @@ struct cpu_gather : auto_register_op<cpu_gather>
    }

    argument
-    // cppcheck-suppress constParameter
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
    {
        std::size_t nelements = output_shape.elements();

--- a/src/targets/cpu/include/migraphx/cpu/parallel.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/parallel.hpp
@@ -7,7 +7,16 @@
 #ifdef MIGRAPHX_DISABLE_OMP
 #include <migraphx/par_for.hpp>
 #else
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
 #include <omp.h>
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
 #endif

 namespace migraphx {

--- a/src/targets/cpu/include/migraphx/cpu/pointwise.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/pointwise.hpp
@@ -319,11 +319,10 @@ struct cpu_unary : reduce_dims_base, auto_register_op<cpu_unary<Op>>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(2);
-        auto s = inputs.at(0);
+        const auto& s = inputs.at(0);
        return {s.type(), s.lens()};
    }
    argument
-    // cppcheck-suppress constParameter
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
    {
        argument result = get_arg(args, args.size() - 1);
@@ -357,12 +356,11 @@ struct cpu_binary : reduce_dims_base, auto_register_op<cpu_binary<Op>>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(3);
-        auto s = inputs.at(0);
+        const auto& s = inputs.at(0);
        return {s.type(), s.lens()};
    }

    argument
-    // cppcheck-suppress constParameter
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
    {
        argument result = get_arg(args, args.size() - 1);

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -223,7 +223,7 @@ struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
    shape compute_shape(const std::vector<shape>& inputs) const
    {
        check_shapes{inputs, *this}.has(1);
-        auto s = inputs.at(0);
+        const auto& s = inputs.at(0);
        return {s.type(), s.lens()};
    }

@@ -352,7 +352,7 @@ struct cpu_apply
            std::transform(bind_inputs.begin(),
                           bind_inputs.end(),
                           std::back_inserter(inputs),
-                           [&](const auto& s) { return r.instructions.at(s); });
+                           [&](const auto& s) { return r.instructions[s]; });
            inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
            modl->replace_instruction(ins, op, inputs);
        });
@@ -460,11 +460,6 @@ struct cpu_apply
        if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
           not v["ceil_mode"].to<bool>())
            return replace(ins, make_op("dnnl::pooling", op.to_value()));
-        op::pooling_mode mode = v["mode"].to<op::pooling_mode>();
-        if(mode == op::pooling_mode::max)
-            return replace(ins, make_op("cpu::pooling_max", v));
-        else if(mode == op::pooling_mode::average)
-            return replace(ins, make_op("cpu::pooling_average", v));
        return ins;
    }


--- a/src/targets/cpu/pooling.cpp
+++ b/src/targets/cpu/pooling.cpp
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
--- a/src/targets/gpu/analyze_streams.cpp
+++ b/src/targets/gpu/analyze_streams.cpp
--- a/src/targets/gpu/compile_gen.cpp
+++ b/src/targets/gpu/compile_gen.cpp
--- a/src/targets/gpu/compile_hip.cpp
+++ b/src/targets/gpu/compile_hip.cpp
--- a/src/targets/gpu/compile_hip_code_object.cpp
+++ b/src/targets/gpu/compile_hip_code_object.cpp