Merge branch 'develop' into simplify_1_mul_div_ops

5ec8f913 · Ted Themistokleous · Ted Themistokleous · 32d69e8e · d78bcdfb · 5ec8f913
Commit 5ec8f913 authored Sep 13, 2022 by Ted Themistokleous Committed by Ted Themistokleous Sep 13, 2022
20 changed files
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -37,6 +37,7 @@
 #include <migraphx/output_iterator.hpp>
 #include <migraphx/make_op.hpp>
 #include <migraphx/marker.hpp>
+#include <migraphx/supported_segments.hpp>
 #include <iostream>
 #include <sstream>
 #include <algorithm>
@@ -77,11 +78,11 @@ program& program::operator=(program p)
 void program::assign(const program& p)
 {
-    if(!impl)
+    if(not impl)
    {
        impl = std::make_unique<program_impl>();
    }
-    else if(!impl->modules.empty())
+    else if(not impl->modules.empty())
    {
        impl->modules.clear();
    }
@@ -167,13 +168,37 @@ target_assignments program::get_target_assignments(const std::vector<target>& ta
    target_assignments p;
    const auto* mod = get_main_module();
-    for(auto it : iterator_for(*mod))
+    std::vector<std::pair<target, supported_segments>> target_subgraphs;
+    target_subgraphs.reserve(targets.size());
+    std::transform(targets.begin(),
+                   targets.end(),
+                   std::back_inserter(target_subgraphs),
+                   [&](const auto& t) { return std::make_pair(t, t.find_supported(mod, m)); });
+    for(const auto ins : iterator_for(*mod))
    {
-        auto t = std::max_element(
+        if(contains(p, ins))
-            targets.begin(), targets.end(), [it, m](const target& lhs, const target& rhs) {
+        {
-                return lhs.is_supported(it, m) < rhs.is_supported(it, m);
+            continue;
-            });
+        }
-        p.add_assignment(it, t->name());
+        for(const auto& [target, subgraph] : target_subgraphs)
+        {
+            // can't pass a structured binding into lambda in C++17 so create a variable for it
+            const auto& t = target;
+            for(const auto& segment : subgraph)
+            {
+                const auto& instructions = segment.instructions;
+                if(not contains(instructions, ins))
+                {
+                    continue;
+                }
+                std::transform(instructions.begin(),
+                               instructions.end(),
+                               std::inserter(p, p.end()),
+                               [&](auto instr) { return std::make_pair(instr, t.name()); });
+            }
+        }
    }
    return p;
 }

--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -40,6 +40,7 @@
 #include <migraphx/register_target.hpp>
 #include <migraphx/json.hpp>
 #include <migraphx/make_op.hpp>
+#include <migraphx/op/common.hpp>
 #ifdef HAVE_GPU
 #include <migraphx/gpu/hip.hpp>
@@ -82,7 +83,7 @@ void visit_py(T x, F f)
    {
        f(x.template cast<bool>());
    }
-    else if(py::isinstance<py::int_>(x))
+    else if(py::isinstance<py::int_>(x) or py::hasattr(x, "__index__"))
    {
        f(x.template cast<int>());
    }
@@ -324,6 +325,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
        .def("get_parameter_names", &migraphx::program::get_parameter_names)
        .def("get_parameter_shapes", &migraphx::program::get_parameter_shapes)
        .def("get_output_shapes", &migraphx::program::get_output_shapes)
+        .def("is_compiled", &migraphx::program::is_compiled)
        .def(
            "compile",
            [](migraphx::program& p, const migraphx::target& t, bool offload_copy, bool fast_math) {
@@ -358,18 +360,35 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
        .def("__ne__", std::not_equal_to<migraphx::program>{})
        .def("__repr__", [](const migraphx::program& p) { return migraphx::to_string(p); });
-    py::class_<migraphx::operation>(m, "op")
+    py::class_<migraphx::operation> op(m, "op");
-        .def(py::init([](const std::string& name, py::kwargs kwargs) {
+    op.def(py::init([](const std::string& name, py::kwargs kwargs) {
-            migraphx::value v = migraphx::value::object{};
+          migraphx::value v = migraphx::value::object{};
-            if(kwargs)
+          if(kwargs)
-            {
+          {
-                v = migraphx::to_value(kwargs);
+              v = migraphx::to_value(kwargs);
-            }
+          }
-            return migraphx::make_op(name, v);
+          return migraphx::make_op(name, v);
-        }))
+      }))
        .def("name", &migraphx::operation::name);
+    py::enum_<migraphx::op::pooling_mode>(op, "pooling_mode")
+        .value("average", migraphx::op::pooling_mode::average)
+        .value("max", migraphx::op::pooling_mode::max)
+        .value("lpnorm", migraphx::op::pooling_mode::lpnorm);
+    py::enum_<migraphx::op::rnn_direction>(op, "rnn_direction")
+        .value("forward", migraphx::op::rnn_direction::forward)
+        .value("reverse", migraphx::op::rnn_direction::reverse)
+        .value("bidirectional", migraphx::op::rnn_direction::bidirectional);
+    m.def(
+        "argument_from_pointer",
+        [](const migraphx::shape shape, const int64_t address) {
+            return migraphx::argument(shape, reinterpret_cast<void*>(address));
+        },
+        py::arg("shape"),
+        py::arg("address"));
    m.def(
        "parse_tf",
        [](const std::string& filename,

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -70,7 +70,7 @@ void quantize_int8(program& prog,
 {
    std::set<std::string> op_names = {"convolution", "dot"};
    std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
-    if(!std::includes(
+    if(not std::includes(
           op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
    {
        MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");

--- a/src/rewrite_gelu.cpp
+++ b/src/rewrite_gelu.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/rewrite_gelu.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/matcher.hpp>
+#include <migraphx/match/gelu_erf.hpp>
+#include <migraphx/common.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct find_gelu_erf
+{
+    auto matcher() const { return match::gelu_erf(); }
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins = r.result;
+        auto x   = r.instructions["x"];
+        if(x->get_shape().type() != migraphx::shape::half_type)
+            return;
+        auto lit = m.add_literal(literal{shape{x->get_shape().type()}, {1.702f}});
+        auto mul = insert_common_op(m, ins, make_op("mul"), {x, lit});
+        auto sig = m.insert_instruction(ins, make_op("neg"), mul);
+        sig      = m.insert_instruction(ins, make_op("exp"), sig);
+        auto one = m.add_literal(literal{shape{x->get_shape().type()}, {1.0f}});
+        sig      = insert_common_op(m, ins, make_op("add"), {sig, one});
+        sig      = m.insert_instruction(ins, make_op("div"), x, sig);
+        m.replace_instruction(ins, sig);
+    }
+};
+void rewrite_gelu::apply(module& m) const { match::find_matches(m, find_gelu_erf{}); }
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/rewrite_pooling.cpp
+++ b/src/rewrite_pooling.cpp
@@ -47,12 +47,12 @@ void rewrite_pooling::apply(module& m) const
        if(not s.standard())
            continue;
        auto&& op = any_cast<op::pooling>(ins->get_operator());
-        if(!std::all_of(op.padding.begin(), op.padding.end(), [](auto i) { return i == 0; }))
+        if(not std::all_of(op.padding.begin(), op.padding.end(), [](auto i) { return i == 0; }))
            continue;
-        if(!std::all_of(op.stride.begin(), op.stride.end(), [](auto i) { return i == 1; }))
+        if(not std::all_of(op.stride.begin(), op.stride.end(), [](auto i) { return i == 1; }))
            continue;
        auto lens = s.lens();
-        if(!std::equal(lens.begin() + 2, lens.end(), op.lengths.begin(), op.lengths.end()))
+        if(not std::equal(lens.begin() + 2, lens.end(), op.lengths.begin(), op.lengths.end()))
            continue;
        std::int64_t n = s.lens()[0];
        std::int64_t c = s.lens()[1];

--- a/src/rewrite_rnn.cpp
+++ b/src/rewrite_rnn.cpp
@@ -214,7 +214,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
            ih = m.add_literal(migraphx::literal{ih_shape, data});
        }
-        if(!is_forward and variable_seq_len)
+        if(not is_forward and variable_seq_len)
        {
            args[0] =
                m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens);
@@ -520,7 +520,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
            ih = m.add_literal(migraphx::literal{ih_shape, data});
        }
-        if(!is_forward and variable_seq_len)
+        if(not is_forward and variable_seq_len)
        {
            args[0] =
                m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens);
@@ -977,7 +977,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
            pph = args[7];
        }
-        if(!is_forward and variable_seq_len)
+        if(not is_forward and variable_seq_len)
        {
            args[0] =
                m.insert_instruction(ins, make_op("rnn_var_sl_shift_sequence"), args[0], seq_lens);
@@ -1294,11 +1294,11 @@ bool rewrite_rnn::is_variable_seq_lens(const module& m, instruction_ref seq_lens
            std::vector<int64_t> vec_lens;
            arg_lens.visit([&](auto l) { vec_lens.assign(l.begin(), l.end()); });
            int64_t l = 0;
-            if(!vec_lens.empty())
+            if(not vec_lens.empty())
            {
                l = vec_lens[0];
            }
-            if(!std::all_of(vec_lens.begin(), vec_lens.end(), [&](auto v) { return v == l; }))
+            if(not std::all_of(vec_lens.begin(), vec_lens.end(), [&](auto v) { return v == l; }))
            {
                is_var_lens = true;
            }
@@ -1318,7 +1318,7 @@ rewrite_rnn::get_seq_len(const module& m, instruction_ref input, instruction_ref
    bool is_var_lens = is_variable_seq_lens(m, seq_lens);
    auto input_shape = input->get_shape();
    auto length      = input_shape.lens()[0];
-    if(!is_var_lens and seq_lens != m.end())
+    if(not is_var_lens and seq_lens != m.end())
    {
        auto arg_len = seq_lens->eval();
        std::vector<std::size_t> vec_lens;
@@ -1387,7 +1387,7 @@ void rewrite_rnn::replace_last_cell_output(module& m,
    if(variable_seq_len)
    {
-        if(!ins_outputs.empty())
+        if(not ins_outputs.empty())
        {
            cell_outputs = m.insert_instruction(
                std::next(ins),

--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -477,7 +477,7 @@ bool operator==(const shape::dynamic_dimension& x, const shape::dynamic_dimensio
 bool operator!=(const shape::dynamic_dimension& x, const shape::dynamic_dimension& y)
 {
-    return !(x == y);
+    return not(x == y);
 }
 std::ostream& operator<<(std::ostream& os, const shape::dynamic_dimension& x)
 {
@@ -497,7 +497,7 @@ bool operator==(const shape& x, const shape& y)
            x.strides() == y.strides() and x.sub_shapes() == y.sub_shapes());
 }
-bool operator!=(const shape& x, const shape& y) { return !(x == y); }
+bool operator!=(const shape& x, const shape& y) { return not(x == y); }
 std::ostream& operator<<(std::ostream& os, const shape& x)
 {

--- a/src/simplify_algebra.cpp
+++ b/src/simplify_algebra.cpp
--- a/src/simplify_reshapes.cpp
+++ b/src/simplify_reshapes.cpp
--- a/src/targets/cpu/binary.cpp
+++ b/src/targets/cpu/binary.cpp
@@ -49,7 +49,7 @@ struct dnnl_binary : dnnl_op<dnnl_binary, dnnl::binary>
        auto s0 = inputs.at(0);
        auto s1 = inputs.at(1);
        auto r  = s0;
-        if(s0 != s1 or !s0.packed())
+        if(s0 != s1 or not s0.packed())
        {
            r = shape{s0.type(), s0.lens()};
        }

--- a/src/targets/fpga/include/migraphx/fpga/target.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/target.hpp
@@ -30,6 +30,7 @@
 #include <migraphx/compile_options.hpp>
 #include <migraphx/fpga/context.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/supported_segments.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -41,7 +42,7 @@ struct target
    std::string name() const;
    std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
    migraphx::context get_context() const { return context{}; }
-    float is_supported(instruction_ref ins, support_metric m);
+    supported_segments find_supported(const_module_ref mod, support_metric m) const;
    argument copy_to(const argument& arg) const { return arg; }
    argument copy_from(const argument& arg) const { return arg; }

--- a/src/targets/fpga/subgraph.cpp
+++ b/src/targets/fpga/subgraph.cpp
@@ -95,7 +95,7 @@ void subgraph::apply(module_pass_manager& mpm) const
    for(auto it : iterator_for(mod))
    {
        // assuming we want all the params/literals as inputs to the FPGA submodule
-        if(migraphx::starts_with(it->name(), "@param") ||
+        if(migraphx::starts_with(it->name(), "@param") or
           migraphx::starts_with(it->name(), "@literal"))
        {
            literal_inputs.push_back(it);

--- a/src/targets/fpga/target.cpp
+++ b/src/targets/fpga/target.cpp
@@ -34,6 +34,7 @@
 #include <migraphx/dead_code_elimination.hpp>
 #include <migraphx/generate.hpp>
 #include <migraphx/normalize_ops.hpp>
+#include <migraphx/iterator_for.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -62,12 +63,17 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
 argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
-float is_supported(instruction_ref ins, support_metric m)
+supported_segments target::find_supported(const_module_ref mod, support_metric m) const
 {
-    // for now, not using the ins and metric to return a value
-    (void)ins;
    (void)m;
-    return 1.0;
+    supported_segment instrs;
+    for(const auto ins : iterator_for(*mod))
+    {
+        instrs.instructions.insert(ins);
+    }
+    instrs.metric = 1; // arbitrary value
+    return {instrs};
 }
 MIGRAPHX_REGISTER_TARGET(target);

--- a/src/targets/gpu/code_object_op.cpp
+++ b/src/targets/gpu/code_object_op.cpp
@@ -51,7 +51,8 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
    std::vector<void*> kargs(args.size());
    std::transform(
        args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); });
-    k.launch(ctx.get_stream().get(), global, local, std::move(kargs));
+    auto [start, stop] = ctx.get_perf_events();
+    k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
    return args[get_output_arg(args.size())];
 }
 void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)

--- a/src/targets/gpu/compile_gen.cpp
+++ b/src/targets/gpu/compile_gen.cpp
--- a/src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
@@ -131,7 +131,7 @@ struct hip_array
    friend MIGRAPHX_DEVICE_CONSTEXPR bool operator!=(const hip_array& x, const hip_array& y)
    {
-        return !(x == y);
+        return not(x == y);
    }
    // This uses the product order rather than lexical order
    friend MIGRAPHX_DEVICE_CONSTEXPR bool operator<(const hip_array& x, const hip_array& y)

--- a/src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
+++ b/src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
@@ -117,12 +117,13 @@ template <class V, class F, class... Ts>
 void hip_visit_all_impl(const shape& s, F f, V&& v, Ts&&... xs)
 {
    std::initializer_list<migraphx::shape::type_t> types = {get_shape(xs).type()...};
-    if(!std::all_of(
+    if(not std::all_of(
           types.begin(), types.end(), [&](migraphx::shape::type_t t) { return t == s.type(); }))
        MIGRAPHX_THROW("Types must be the same");
    std::initializer_list<index_int> ranks = {
        static_cast<index_int>(get_shape(xs).lens().size())...};
-    if(!std::all_of(ranks.begin(), ranks.end(), [&](index_int r) { return r == s.lens().size(); }))
+    if(not std::all_of(
+           ranks.begin(), ranks.end(), [&](index_int r) { return r == s.lens().size(); }))
        MIGRAPHX_THROW("Ranks must be the same");
    visit_tensor_size(s.lens().size(), [&](auto ndim) {
        s.visit_type(hip_visitor([&](auto as) { v(f(xs, ndim, as)...); }));
@@ -134,7 +135,8 @@ void hip_visit_views_impl(const shape& s, F f, V&& v, Ts&&... xs)
 {
    std::initializer_list<index_int> ranks = {
        static_cast<index_int>(get_shape(xs).lens().size())...};
-    if(!std::all_of(ranks.begin(), ranks.end(), [&](index_int r) { return r == s.lens().size(); }))
+    if(not std::all_of(
+           ranks.begin(), ranks.end(), [&](index_int r) { return r == s.lens().size(); }))
        MIGRAPHX_THROW("Ranks must be the same");
    visit_tensor_size(s.lens().size(), [&](auto ndim) { v(f(xs, ndim)...); });
 }

--- a/src/targets/gpu/device/multinomial.cpp
+++ b/src/targets/gpu/device/multinomial.cpp
@@ -47,7 +47,7 @@ constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value)
        it   = first;
        step = count / 2;
        std::advance(it, step);
-        if(!(value < *it))
+        if(not(value < *it))
        {
            first = ++it;
            count -= step + 1;

--- a/src/targets/gpu/driver/compile_op.cpp
+++ b/src/targets/gpu/driver/compile_op.cpp
--- a/src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
+++ b/src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace driver {
-double time_op(context& ctx, operation op, const std::vector<shape>& inputs, int n = 100);
+std::pair<double, double>
+time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
 } // namespace driver
 } // namespace gpu