Merge remote-tracking branch 'origin/jit-contiguous' into transformer-opts

d4ee7984 · turneram · 1bc16951 · 835cc1e2 · d4ee7984 · d4ee7984
Commit d4ee7984 authored May 17, 2022 by turneram
3 changed files
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -25,6 +25,7 @@
 #include <migraphx/instruction.hpp>
 #include <migraphx/register_op.hpp>
 #include <migraphx/array.hpp>
+#include <migraphx/make_op.hpp>
 #include <migraphx/op/clip.hpp>
 #include <cmath>
 #include <set>
@@ -948,9 +949,44 @@ struct find_commutative_broadcast
    }
 };
+struct find_contiguous
+{
+    auto matcher() const { return match::name("gpu::contiguous"); }
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins  = r.result;
+        auto args = ins->inputs();
+        m.replace_instruction(
+            ins,
+            make_op("gpu::precompile_op", {{"op", to_value(make_op("contiguous"))}}),
+            ins->inputs());
+    }
+};
+struct find_contiguous_pointwise
+{
+    auto matcher() const
+    {
+        return match::name("gpu::contiguous")(match::arg(0)(precompile_name("pointwise")));
+    }
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins    = r.result;
+        auto pw     = ins->inputs().front();
+        auto alloc  = ins->inputs().back();
+        auto args   = pw->inputs();
+        args.back() = alloc;
+        m.replace_instruction(ins, pw->get_operator(), args, pw->module_inputs());
+    }
+};
 void fuse_ops::apply(module& m) const
 {
-    match::find_matches(m, find_gelu{}, find_gelu_new{fast_math});
+    match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math});
    run_passes(m, {dead_code_elimination{}});
    match::find_matches(m, find_triadd{});
    match::find_matches(m,
@@ -968,6 +1004,7 @@ void fuse_ops::apply(module& m) const
                        find_add_clip{});
    run_passes(m, {dead_code_elimination{}});
    match::find_matches(m, find_triadd_layernorm{}, find_gemm_add{}, find_commutative_broadcast{});
+    match::find_matches(m, find_contiguous{});
 }
 } // namespace gpu

--- a/src/targets/gpu/jit/pointwise.cpp
+++ b/src/targets/gpu/jit/pointwise.cpp
@@ -41,7 +41,7 @@ __global__ void kernel(${params})
 struct pointwise_compiler : compiler<pointwise_compiler>
 {
-    std::vector<std::string> names() const { return {"pointwise"}; }
+    std::vector<std::string> names() const { return {"pointwise", "contiguous"}; }
    static std::size_t oversubscribe_if(bool b)
    {
@@ -146,29 +146,38 @@ struct pointwise_compiler : compiler<pointwise_compiler>
        return compile_hip_code_object(src, options);
    }
-    compiler_replace compile(context& ctx, instruction_ref ins, const operation&) const
+    compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
    {
-        assert(not ins->module_inputs().empty());
+        if(op.name() == "contiguous")
-        auto* pm = ins->module_inputs().front();
+        {
-        run_passes(*pm, {eliminate_common_subexpression{}, dead_code_elimination{}});
+            return replace(compile_op(
-        cpp_generator g;
+                ctx, to_shapes(ins->inputs()), {{"lambda", "[](auto x) { return x; }"}}));
-        g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
+        }
-        g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
+        else
-        g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
+        {
-        g.add_point_op("sign",
+            assert(not ins->module_inputs().empty());
-                       "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
+            auto* pm = ins->module_inputs().front();
-        g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
+            run_passes(*pm, {eliminate_common_subexpression{}, dead_code_elimination{}});
-        g.add_point_op("less", "migraphx::abs(${0} < ${1})");
+            cpp_generator g;
-        g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
+            g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
-        g.add_point_op("not", "migraphx::abs(not ${0})");
+            g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
-        // Add explict conversions
+            g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
-        g.fresult(
+            g.add_point_op("sign",
-            [](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; });
+                           "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
-        auto name = g.create_function(
+            g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
-            g.generate_module(*pm).set_attributes({"__device__"}).set_generic_types(*pm));
+            g.add_point_op("less", "migraphx::abs(${0} < ${1})");
-        std::string lambda = "MIGRAPHX_LIFT(" + name + ")";
+            g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
-        return replace(
+            g.add_point_op("not", "migraphx::abs(not ${0})");
-            compile_op(ctx, to_shapes(ins->inputs()), {{"lambda", lambda}, {"preamble", g.str()}}));
+            // Add explict conversions
+            g.fresult([](const shape& s) {
+                return "migraphx::convert<" + shape::cpp_type(s.type()) + ">";
+            });
+            auto name = g.create_function(
+                g.generate_module(*pm).set_attributes({"__device__"}).set_generic_types(*pm));
+            std::string lambda = "MIGRAPHX_LIFT(" + name + ")";
+            return replace(compile_op(
+                ctx, to_shapes(ins->inputs()), {{"lambda", lambda}, {"preamble", g.str()}}));
+        }
    }
 };
 } // namespace gpu

--- a/src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
@@ -18,8 +18,15 @@ struct implicit_conversion_op
    template <index_int N, class U>
    constexpr operator vec<U, N>() const
    {
-        static_assert(vec_size<T>() == N, "Vector mismatch size");
+        if constexpr(vec_size<T>() == 0)
-        return __builtin_convertvector(x, vec<U, N>);
+        {
+            return x;
+        }
+        else
+        {
+            static_assert(vec_size<T>() == N, "Vector mismatch size");
+            return __builtin_convertvector(x, vec<U, N>);
+        }
    }
    template <class U>