Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into...

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into test_runner_match_input_output

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into...
Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into test_runner_match_input_output
2d9e620b · Shucai Xiao · 2a73d9a9 · 19f65e7e · 2d9e620b · 2d9e620b
Commit 2d9e620b authored Nov 16, 2021 by Shucai Xiao
20 changed files
--- a/src/cpp_generator.cpp
+++ b/src/cpp_generator.cpp
@@ -26,17 +26,19 @@ cpp_generator::function::set_body(const module& m, const cpp_generator::generate
        {
            names[ins] =
                migraphx::any_cast<migraphx::builtin::param>(ins->get_operator()).parameter;
-            continue;
        }
-        if(ins->name() == "@return")
+        else if(ins->name() == "@return")
        {
            assert(ins->inputs().size() == 1);
            return_ins = ins->inputs().front();
        }
+        else
+        {
            std::string n = "z" + std::to_string(names.size());
            names[ins]    = n;
            ss << "auto " << n << " = " << g(ins, names) << ";\n";
        }
+    }
    ss << "return " << names.at(return_ins) << ";\n";
    body = ss.str();
    return *this;
@@ -85,7 +87,10 @@ std::string cpp_generator::generate_point_op(const operation& op,
                                             const std::vector<std::string>& args)
 {
    auto v          = op.to_value();
-    return interpolate_string(op.attributes()["point_op"].to<std::string>(),
+    auto attributes = op.attributes();
+    if(not attributes.contains("point_op"))
+        MIGRAPHX_THROW("op is missing point_op attribute: " + op.name());
+    return interpolate_string(attributes["point_op"].to<std::string>(),
                              [&](auto start, auto last) -> std::string {
                                  auto key = trim({start, last});
                                  if(key.empty())
@@ -120,7 +125,12 @@ std::string cpp_generator::str() const { return impl->fs.str(); }
 cpp_generator::function cpp_generator::generate_module(const module& m)
 {
    function f;
-    f.set_name(m.name()).set_types(m).set_body(
+    auto name = transform_string(m.name(), [](char c) {
+        if(with_char(::isalnum)(c) or c == '_')
+            return c;
+        return '_';
+    });
+    f.set_name(name).set_types(m).set_body(
        m, [&](instruction_ref ins, const auto& names) -> std::string {
            if(ins->name() == "@literal")
                return shape::cpp_type(ins->get_shape().type()) + "(" +
@@ -130,7 +140,6 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
                           ins->inputs().end(),
                           std::back_inserter(args),
                           [&](auto i) { return names.at(i); });
-            auto s = this->generate_point_op(ins->get_operator(), args);
            return this->generate_point_op(ins->get_operator(), args);
        });
    return f;

--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -480,7 +480,7 @@ struct perf : command<perf>
        std::cout << "Allocating params ... " << std::endl;
        auto m = c.params(p);
        std::cout << "Running performance report ... " << std::endl;
-        p.perf_report(std::cout, n, m);
+        p.perf_report(std::cout, n, m, c.l.batch);
    }
 };

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -13,6 +13,8 @@ inline namespace MIGRAPHX_INLINE_NS {
 static literal get_scalar(instruction_ref ins)
 {
+    if(ins->name() == "contiguous")
+        return get_scalar(ins->inputs().front());
    const auto& s = ins->get_shape();
    if(not(s.elements() == 1 or s.scalar()))
        return {};
@@ -31,11 +33,16 @@ static void create_pointwise_modules(module_pass_manager& mpm)
    {
        if(not ins->get_operator().attributes().get("pointwise", false))
            continue;
-        auto* pm = mpm.create_module("pointwise" + std::to_string(n++));
+        // Skip convert op for now
+        if(ins->name() == "convert")
+            continue;
+        assert(ins->get_operator().attributes().contains("point_op"));
+        auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++));
        pm->set_bypass();
        std::unordered_map<instruction_ref, instruction_ref> param_map;
        std::vector<instruction_ref> pointwise_inputs;
+        std::size_t i = 0;
        for(auto input : ins->inputs())
        {
            if(contains(param_map, input))
@@ -44,8 +51,9 @@ static void create_pointwise_modules(module_pass_manager& mpm)
            if(scalar.empty())
            {
                pointwise_inputs.push_back(input);
-                param_map[input] = pm->add_parameter("x" + std::to_string(param_map.size()),
+                param_map[input] =
-                                                     shape{input->get_shape().type()});
+                    pm->add_parameter("x" + std::to_string(i), shape{input->get_shape().type()});
+                i++;
            }
            else
            {
@@ -68,6 +76,7 @@ static void create_pointwise_modules(module_pass_manager& mpm)
 static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
                                                            instruction_ref output)
 {
+    assert(contains(output->inputs(), ins));
    module_ref pm = ins->module_inputs().at(0);
    module_ref xm = output->module_inputs().at(0);
@@ -75,6 +84,9 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
    assert(last->name() == "@return");
    assert(last->inputs().size() == 1);
+    assert(pm->get_parameter_names().size() == ins->inputs().size());
+    assert(xm->get_parameter_names().size() == output->inputs().size());
    std::vector<instruction_ref> inputs = ins->inputs();
    std::unordered_map<instruction_ref, instruction_ref> map_ins;
    std::unordered_map<instruction_ref, instruction_ref> input_map;
@@ -83,6 +95,7 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
    {
        auto input = inputs[i];
        auto param = pm->get_parameter("x" + std::to_string(i));
+        assert(param != pm->end());
        input_map[input] = param;
    }
    // Add the new parameter and additional inputs
@@ -90,6 +103,7 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
    {
        auto input = output->inputs()[i];
        auto param = xm->get_parameter("x" + std::to_string(i));
+        assert(param != xm->end());
        if(input == ins)
        {
            map_ins[param]   = last->inputs().front();

--- a/src/include/migraphx/op/pointwise.hpp
+++ b/src/include/migraphx/op/pointwise.hpp
@@ -26,19 +26,17 @@ struct pointwise
        auto pnames = pm->get_parameter_names();
        std::sort(pnames.begin(), pnames.end());
        check_shapes{inputs, *this}.has(pnames.size()).same_dims();
-        for(auto i : range(pnames.size()))
-        {
-            auto s1 = pm->get_parameter(pnames[i])->get_shape();
-            auto s2 = inputs[i];
-            if(s1.type() != s2.type())
-                MIGRAPHX_THROW("Mismatch type");
-        }
        if(pm->get_output_shapes().size() != 1)
            MIGRAPHX_THROW("submodule should have only one output.");
        auto type = pm->get_output_shapes().front().type();
+        // Scalar output if all inputs are scalar
+        if(inputs.front().elements() == 1 and
+           all_of(inputs, [](const auto& s) { return s.scalar(); }))
+            return shape{type};
        return shape::from_permutation(type, inputs.front().lens(), find_permutation(inputs));
    }

--- a/src/include/migraphx/op/prelu.hpp
+++ b/src/include/migraphx/op/prelu.hpp
@@ -9,6 +9,7 @@ namespace op {
 struct prelu : binary<prelu>
 {
+    std::string point_op() const { return "(${0} < 0) ? (${0} * ${1}) : ${0}"; }
    auto apply() const
    {
        return [](auto x, auto slope) { return ((x < 0) ? (x * slope) : x); };

--- a/src/include/migraphx/op/recip.hpp
+++ b/src/include/migraphx/op/recip.hpp
@@ -9,6 +9,7 @@ namespace op {
 struct recip : unary<recip>
 {
+    std::string point_op() const { return "1 / ${0}"; }
    auto apply() const
    {
        return [](auto x) { return 1 / x; };

--- a/src/include/migraphx/op/sigmoid.hpp
+++ b/src/include/migraphx/op/sigmoid.hpp
@@ -18,6 +18,7 @@ namespace op {
 struct sigmoid : unary<sigmoid>
 {
+    std::string point_op() const { return "1.f / (1.f + ${function:exp}(-${0}))"; }
    auto apply() const
    {
        return [](auto x) { return 1.f / (1.f + std::exp(-x)); };

--- a/src/include/migraphx/op/sign.hpp
+++ b/src/include/migraphx/op/sign.hpp
@@ -18,6 +18,7 @@ namespace op {
 struct sign : unary<sign>
 {
+    std::string point_op() const { return "(${0} > 0 ? 1 : ((${0} < 0) ? -1 : 0))"; }
    auto apply() const
    {
        return [](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); };

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -103,7 +103,7 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
 } // namespace operation_operators
 template <class T>
-auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
+auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
    -> decltype(x.normalize_compute_shape(inputs))
 {
    dependent_type<operation, T> y = x;
@@ -111,6 +111,13 @@ auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& i
    return any_cast<T>(y).normalize_compute_shape(inputs);
 }
+template <class T>
+auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
+    -> decltype(x.compute_shape(inputs, {}))
+{
+    return x.compute_shape(inputs, {});
+}
 template <class T>
 shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
 {
@@ -121,7 +128,7 @@ shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
 template <class T>
 shape normalize_compute_shape_op(const T& x, const std::vector<shape>& inputs)
 {
-    return normalize_compute_shape_op(rank<1>{}, x, inputs);
+    return normalize_compute_shape_op(rank<2>{}, x, inputs);
 }
 template <class T>

--- a/src/include/migraphx/program.hpp
+++ b/src/include/migraphx/program.hpp
@@ -67,7 +67,8 @@ struct program
    void finalize();
-    void perf_report(std::ostream& os, std::size_t n, parameter_map params) const;
+    void
+    perf_report(std::ostream& os, std::size_t n, parameter_map params, std::size_t batch = 1) const;
    void mark(const parameter_map& params, marker&& m);

--- a/src/include/migraphx/stringutils.hpp
+++ b/src/include/migraphx/stringutils.hpp
@@ -18,7 +18,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 template <class F>
 auto with_char(F f)
 {
-    return [=](unsigned char c) { return f(c); };
+    return [=](unsigned char c) -> bool { return f(c); };
 }
 inline std::string
@@ -120,22 +120,27 @@ interpolate_string(const std::string& input, F f, std::string start = "${", std:
        result.append(it, next_start);
        if(next_start == input.end())
            break;
-        auto r = f(next_start + start.size(), next_end - end.size() + 1);
+        auto r = f(next_start + start.size(), next_end);
        result.append(r.begin(), r.end());
-        it = next_end + 1;
+        it = next_end + end.size();
    }
    return result;
 }
 inline std::string interpolate_string(const std::string& input,
-                                      const std::unordered_map<std::string, std::string>& vars)
+                                      const std::unordered_map<std::string, std::string>& vars,
+                                      std::string start = "${",
+                                      std::string end   = "}")
 {
-    return interpolate_string(input, [&](auto start, auto last) {
+    return interpolate_string(input,
-        auto key = trim({start, last});
+                              [&](auto start_it, auto last_it) {
+                                  auto key = trim({start_it, last_it});
                                  auto it  = vars.find(key);
                                  if(it == vars.end())
                                      throw std::runtime_error("Unknown key: " + key);
                                  return it->second;
-    });
+                              },
+                              std::move(start),
+                              std::move(end));
 }
 template <class Iterator>

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -526,7 +526,10 @@ void program::mark(const parameter_map& params, marker&& m)
    m.mark_stop(*this);
 }
-void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) const
+void program::perf_report(std::ostream& os,
+                          std::size_t n,
+                          parameter_map params,
+                          std::size_t batch) const
 {
    auto& ctx = this->impl->ctx;
    // Run once by itself
@@ -619,7 +622,8 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
    os << std::endl;
-    os << "Rate: " << rate << "/sec" << std::endl;
+    os << "Batch size: " << batch << std::endl;
+    os << "Rate: " << rate * batch << "/sec" << std::endl;
    os << "Total time: " << total_time << "ms" << std::endl;
    os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
    os << "Overhead time: " << overhead_time << "ms"

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -122,6 +122,7 @@ add_library(migraphx_gpu
    batch_norm_inference.cpp
    clip.cpp
    code_object_op.cpp
+    compile_ops.cpp
    compile_hip.cpp
    compile_hip_code_object.cpp
    compile_pointwise.cpp

--- a/src/targets/gpu/allocation_model.cpp
+++ b/src/targets/gpu/allocation_model.cpp
 #include <migraphx/gpu/allocation_model.hpp>
 #include <migraphx/make_op.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/module.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/gpu/compile_ops.cpp
+++ b/src/targets/gpu/compile_ops.cpp
+#include <migraphx/gpu/compile_ops.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/register_op.hpp>
+#include <migraphx/op/identity.hpp>
+#include <migraphx/gpu/compile_pointwise.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct precompile_op
+{
+    operation op = op::identity{};
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.op, "op"));
+    }
+    std::string name() const { return "gpu::precompile_op"; }
+    shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
+    {
+        inputs.pop_back();
+        return op.compute_shape(inputs, mods);
+    }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+MIGRAPHX_REGISTER_OP(precompile_op);
+struct pointwise_compiler
+{
+    std::string name() const { return "pointwise"; }
+    operation apply(context& ctx, instruction_ref ins, const operation&) const
+    {
+        assert(not ins->module_inputs().empty());
+        auto* pm = ins->module_inputs().front();
+        return compile_pointwise(ctx, to_shapes(ins->inputs()), *pm);
+    }
+};
+using compiler_function = std::function<operation(context&, instruction_ref, operation)>;
+template <class T>
+compiler_function make_compiler_function(T x)
+{
+    return {[=](auto&&... xs) { return x.apply(xs...); }};
+}
+template <class... Ts>
+std::unordered_map<std::string, compiler_function> make_compilers(Ts... xs)
+{
+    return {{xs.name(), make_compiler_function(xs)}...};
+}
+void compile_ops::apply(module& m) const
+{
+    auto compilers = make_compilers(pointwise_compiler{});
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name() != "gpu::precompile_op")
+            continue;
+        operation preop = any_cast<precompile_op>(ins->get_operator()).op;
+        assert(contains(compilers, preop.name()));
+        auto op = compilers[preop.name()](*ctx, ins, preop);
+        m.replace_instruction(ins, op, ins->inputs());
+    }
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/compile_pointwise.cpp
+++ b/src/targets/gpu/compile_pointwise.cpp
@@ -2,9 +2,14 @@
 #include <migraphx/gpu/compile_hip_code_object.hpp>
 #include <migraphx/gpu/compile_hip.hpp>
 #include <migraphx/gpu/context.hpp>
+#include <migraphx/cpp_generator.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/reduce_dims.hpp>
 #include <migraphx/stringutils.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/eliminate_common_subexpression.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/pass_manager.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -17,6 +22,8 @@ static const char* const pointwise_kernel = R"__migraphx__(
 using namespace migraphx;
+${preamble}
 extern "C" {
 __global__ void kernel(${params}) 
 {
@@ -29,7 +36,10 @@ int main() {}
 )__migraphx__";
-operation compile_pointwise(context&, const std::vector<shape>& inputs, const std::string& lambda)
+operation compile_pointwise(context&,
+                            const std::vector<shape>& inputs,
+                            const std::string& lambda,
+                            const std::string& preamble)
 {
    hip_compile_options options;
    options.global         = compute_global(inputs.front().elements());
@@ -37,13 +47,23 @@ operation compile_pointwise(context&, const std::vector<shape>& inputs, const st
    options.inputs         = inputs;
    options.output         = inputs.back();
    options.reduced_inputs = reduce_dims(inputs);
+    options.params         = "-Wno-float-equal";
    auto src               = interpolate_string(pointwise_kernel,
                                  {{"params", enum_params(inputs.size(), "void * private_p")},
                                   {"args", enum_params(inputs.size(), "private_p")},
-                                   {"lambda", lambda}});
+                                   {"lambda", lambda},
+                                   {"preamble", preamble}});
    return compile_hip_code_object(src, options);
 }
+operation compile_pointwise(context& ctx, const std::vector<shape>& inputs, module m)
+{
+    run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}});
+    cpp_generator g;
+    auto name = g.create_function(g.generate_module(m).set_attributes({"__device__"}));
+    return compile_pointwise((ctx), inputs, "&" + name, g.str());
+}
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -5,6 +5,7 @@
 #include <migraphx/gpu/miopen.hpp>
 #include <migraphx/gpu/clip.hpp>
 #include <migraphx/gpu/convolution.hpp>
+#include <migraphx/gpu/device_name.hpp>
 #include <migraphx/gpu/oper.hpp>
 #include <migraphx/gpu/add.hpp>
 #include <migraphx/gpu/mul.hpp>
@@ -26,6 +27,7 @@
 #include <migraphx/array.hpp>
 #include <migraphx/op/clip.hpp>
 #include <cmath>
+#include <set>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -152,6 +154,12 @@ struct fusion
    }
 };
+const std::unordered_set<std::string>& get_supported_archs()
+{
+    static std::unordered_set<std::string> supported_archs{"gfx900", "gfx906", "gfx908", "gfx1030"};
+    return supported_archs;
+}
 MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
 {
    auto&& s = ins->get_shape();
@@ -161,6 +169,9 @@ MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins)
 MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
 {
+    const auto device_name = split_string(get_device_name(), ':').front();
+    if(not contains(get_supported_archs(), device_name))
+        return false;
    if(enabled(MIGRAPHX_DISABLE_MIOPEN_FUSION{}))
        return false;
    if(ins->name() != "gpu::convolution")

--- a/src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
@@ -3,6 +3,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/operation.hpp>
+#include <migraphx/instruction_ref.hpp>
 #include <string>
 namespace migraphx {

--- a/src/targets/gpu/include/migraphx/gpu/compile_ops.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_ops.hpp
+#ifndef MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP
+#define MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP
+#include <migraphx/config.hpp>
+#include <string>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct module;
+namespace gpu {
+struct context;
+struct compile_ops
+{
+    context* ctx = nullptr;
+    std::string name() const { return "gpu::compile_ops"; }
+    void apply(module& m) const;
+};
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP
--- a/src/targets/gpu/include/migraphx/gpu/compile_pointwise.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_pointwise.hpp
@@ -6,11 +6,17 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+struct module;
 namespace gpu {
 struct context;
-operation
+operation compile_pointwise(context& ctx,
-compile_pointwise(context& ctx, const std::vector<shape>& inputs, const std::string& lambda);
+                            const std::vector<shape>& inputs,
+                            const std::string& lambda,
+                            const std::string& preamble = "");
+operation compile_pointwise(context& ctx, const std::vector<shape>& inputs, module m);
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS