Commit d4ee7984 authored by turneram's avatar turneram
Browse files

Merge remote-tracking branch 'origin/jit-contiguous' into transformer-opts

parents 1bc16951 835cc1e2
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/array.hpp> #include <migraphx/array.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/clip.hpp> #include <migraphx/op/clip.hpp>
#include <cmath> #include <cmath>
#include <set> #include <set>
...@@ -948,9 +949,44 @@ struct find_commutative_broadcast ...@@ -948,9 +949,44 @@ struct find_commutative_broadcast
} }
}; };
struct find_contiguous
{
auto matcher() const { return match::name("gpu::contiguous"); }
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto args = ins->inputs();
m.replace_instruction(
ins,
make_op("gpu::precompile_op", {{"op", to_value(make_op("contiguous"))}}),
ins->inputs());
}
};
struct find_contiguous_pointwise
{
auto matcher() const
{
return match::name("gpu::contiguous")(match::arg(0)(precompile_name("pointwise")));
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto pw = ins->inputs().front();
auto alloc = ins->inputs().back();
auto args = pw->inputs();
args.back() = alloc;
m.replace_instruction(ins, pw->get_operator(), args, pw->module_inputs());
}
};
void fuse_ops::apply(module& m) const void fuse_ops::apply(module& m) const
{ {
match::find_matches(m, find_gelu{}, find_gelu_new{fast_math}); match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math});
run_passes(m, {dead_code_elimination{}}); run_passes(m, {dead_code_elimination{}});
match::find_matches(m, find_triadd{}); match::find_matches(m, find_triadd{});
match::find_matches(m, match::find_matches(m,
...@@ -968,6 +1004,7 @@ void fuse_ops::apply(module& m) const ...@@ -968,6 +1004,7 @@ void fuse_ops::apply(module& m) const
find_add_clip{}); find_add_clip{});
run_passes(m, {dead_code_elimination{}}); run_passes(m, {dead_code_elimination{}});
match::find_matches(m, find_triadd_layernorm{}, find_gemm_add{}, find_commutative_broadcast{}); match::find_matches(m, find_triadd_layernorm{}, find_gemm_add{}, find_commutative_broadcast{});
match::find_matches(m, find_contiguous{});
} }
} // namespace gpu } // namespace gpu
......
...@@ -41,7 +41,7 @@ __global__ void kernel(${params}) ...@@ -41,7 +41,7 @@ __global__ void kernel(${params})
struct pointwise_compiler : compiler<pointwise_compiler> struct pointwise_compiler : compiler<pointwise_compiler>
{ {
std::vector<std::string> names() const { return {"pointwise"}; } std::vector<std::string> names() const { return {"pointwise", "contiguous"}; }
static std::size_t oversubscribe_if(bool b) static std::size_t oversubscribe_if(bool b)
{ {
...@@ -146,29 +146,38 @@ struct pointwise_compiler : compiler<pointwise_compiler> ...@@ -146,29 +146,38 @@ struct pointwise_compiler : compiler<pointwise_compiler>
return compile_hip_code_object(src, options); return compile_hip_code_object(src, options);
} }
compiler_replace compile(context& ctx, instruction_ref ins, const operation&) const compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{ {
assert(not ins->module_inputs().empty()); if(op.name() == "contiguous")
auto* pm = ins->module_inputs().front(); {
run_passes(*pm, {eliminate_common_subexpression{}, dead_code_elimination{}}); return replace(compile_op(
cpp_generator g; ctx, to_shapes(ins->inputs()), {{"lambda", "[](auto x) { return x; }"}}));
g.fmap([](const std::string& fname) { return "migraphx::" + fname; }); }
g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})"); else
g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})"); {
g.add_point_op("sign", assert(not ins->module_inputs().empty());
"${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))"); auto* pm = ins->module_inputs().front();
g.add_point_op("equal", "migraphx::abs(${0} == ${1})"); run_passes(*pm, {eliminate_common_subexpression{}, dead_code_elimination{}});
g.add_point_op("less", "migraphx::abs(${0} < ${1})"); cpp_generator g;
g.add_point_op("greater", "migraphx::abs(${0} > ${1})"); g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
g.add_point_op("not", "migraphx::abs(not ${0})"); g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
// Add explict conversions g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
g.fresult( g.add_point_op("sign",
[](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; }); "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
auto name = g.create_function( g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
g.generate_module(*pm).set_attributes({"__device__"}).set_generic_types(*pm)); g.add_point_op("less", "migraphx::abs(${0} < ${1})");
std::string lambda = "MIGRAPHX_LIFT(" + name + ")"; g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
return replace( g.add_point_op("not", "migraphx::abs(not ${0})");
compile_op(ctx, to_shapes(ins->inputs()), {{"lambda", lambda}, {"preamble", g.str()}})); // Add explict conversions
g.fresult([](const shape& s) {
return "migraphx::convert<" + shape::cpp_type(s.type()) + ">";
});
auto name = g.create_function(
g.generate_module(*pm).set_attributes({"__device__"}).set_generic_types(*pm));
std::string lambda = "MIGRAPHX_LIFT(" + name + ")";
return replace(compile_op(
ctx, to_shapes(ins->inputs()), {{"lambda", lambda}, {"preamble", g.str()}}));
}
} }
}; };
} // namespace gpu } // namespace gpu
......
...@@ -18,8 +18,15 @@ struct implicit_conversion_op ...@@ -18,8 +18,15 @@ struct implicit_conversion_op
template <index_int N, class U> template <index_int N, class U>
constexpr operator vec<U, N>() const constexpr operator vec<U, N>() const
{ {
static_assert(vec_size<T>() == N, "Vector mismatch size"); if constexpr(vec_size<T>() == 0)
return __builtin_convertvector(x, vec<U, N>); {
return x;
}
else
{
static_assert(vec_size<T>() == N, "Vector mismatch size");
return __builtin_convertvector(x, vec<U, N>);
}
} }
template <class U> template <class U>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment