Commit d4ee7984 authored by turneram's avatar turneram
Browse files

Merge remote-tracking branch 'origin/jit-contiguous' into transformer-opts

parents 1bc16951 835cc1e2
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/array.hpp> #include <migraphx/array.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/clip.hpp> #include <migraphx/op/clip.hpp>
#include <cmath> #include <cmath>
#include <set> #include <set>
...@@ -948,9 +949,44 @@ struct find_commutative_broadcast ...@@ -948,9 +949,44 @@ struct find_commutative_broadcast
} }
}; };
struct find_contiguous
{
auto matcher() const { return match::name("gpu::contiguous"); }
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto args = ins->inputs();
m.replace_instruction(
ins,
make_op("gpu::precompile_op", {{"op", to_value(make_op("contiguous"))}}),
ins->inputs());
}
};
struct find_contiguous_pointwise
{
auto matcher() const
{
return match::name("gpu::contiguous")(match::arg(0)(precompile_name("pointwise")));
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto pw = ins->inputs().front();
auto alloc = ins->inputs().back();
auto args = pw->inputs();
args.back() = alloc;
m.replace_instruction(ins, pw->get_operator(), args, pw->module_inputs());
}
};
void fuse_ops::apply(module& m) const void fuse_ops::apply(module& m) const
{ {
match::find_matches(m, find_gelu{}, find_gelu_new{fast_math}); match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math});
run_passes(m, {dead_code_elimination{}}); run_passes(m, {dead_code_elimination{}});
match::find_matches(m, find_triadd{}); match::find_matches(m, find_triadd{});
match::find_matches(m, match::find_matches(m,
...@@ -968,6 +1004,7 @@ void fuse_ops::apply(module& m) const ...@@ -968,6 +1004,7 @@ void fuse_ops::apply(module& m) const
find_add_clip{}); find_add_clip{});
run_passes(m, {dead_code_elimination{}}); run_passes(m, {dead_code_elimination{}});
match::find_matches(m, find_triadd_layernorm{}, find_gemm_add{}, find_commutative_broadcast{}); match::find_matches(m, find_triadd_layernorm{}, find_gemm_add{}, find_commutative_broadcast{});
match::find_matches(m, find_contiguous{});
} }
} // namespace gpu } // namespace gpu
......
...@@ -41,7 +41,7 @@ __global__ void kernel(${params}) ...@@ -41,7 +41,7 @@ __global__ void kernel(${params})
struct pointwise_compiler : compiler<pointwise_compiler> struct pointwise_compiler : compiler<pointwise_compiler>
{ {
std::vector<std::string> names() const { return {"pointwise"}; } std::vector<std::string> names() const { return {"pointwise", "contiguous"}; }
static std::size_t oversubscribe_if(bool b) static std::size_t oversubscribe_if(bool b)
{ {
...@@ -146,7 +146,14 @@ struct pointwise_compiler : compiler<pointwise_compiler> ...@@ -146,7 +146,14 @@ struct pointwise_compiler : compiler<pointwise_compiler>
return compile_hip_code_object(src, options); return compile_hip_code_object(src, options);
} }
compiler_replace compile(context& ctx, instruction_ref ins, const operation&) const compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{
if(op.name() == "contiguous")
{
return replace(compile_op(
ctx, to_shapes(ins->inputs()), {{"lambda", "[](auto x) { return x; }"}}));
}
else
{ {
assert(not ins->module_inputs().empty()); assert(not ins->module_inputs().empty());
auto* pm = ins->module_inputs().front(); auto* pm = ins->module_inputs().front();
...@@ -162,13 +169,15 @@ struct pointwise_compiler : compiler<pointwise_compiler> ...@@ -162,13 +169,15 @@ struct pointwise_compiler : compiler<pointwise_compiler>
g.add_point_op("greater", "migraphx::abs(${0} > ${1})"); g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
g.add_point_op("not", "migraphx::abs(not ${0})"); g.add_point_op("not", "migraphx::abs(not ${0})");
// Add explict conversions // Add explict conversions
g.fresult( g.fresult([](const shape& s) {
[](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; }); return "migraphx::convert<" + shape::cpp_type(s.type()) + ">";
});
auto name = g.create_function( auto name = g.create_function(
g.generate_module(*pm).set_attributes({"__device__"}).set_generic_types(*pm)); g.generate_module(*pm).set_attributes({"__device__"}).set_generic_types(*pm));
std::string lambda = "MIGRAPHX_LIFT(" + name + ")"; std::string lambda = "MIGRAPHX_LIFT(" + name + ")";
return replace( return replace(compile_op(
compile_op(ctx, to_shapes(ins->inputs()), {{"lambda", lambda}, {"preamble", g.str()}})); ctx, to_shapes(ins->inputs()), {{"lambda", lambda}, {"preamble", g.str()}}));
}
} }
}; };
} // namespace gpu } // namespace gpu
......
...@@ -17,10 +17,17 @@ struct implicit_conversion_op ...@@ -17,10 +17,17 @@ struct implicit_conversion_op
template <index_int N, class U> template <index_int N, class U>
constexpr operator vec<U, N>() const constexpr operator vec<U, N>() const
{
if constexpr(vec_size<T>() == 0)
{
return x;
}
else
{ {
static_assert(vec_size<T>() == N, "Vector mismatch size"); static_assert(vec_size<T>() == N, "Vector mismatch size");
return __builtin_convertvector(x, vec<U, N>); return __builtin_convertvector(x, vec<U, N>);
} }
}
template <class U> template <class U>
constexpr operator U() const constexpr operator U() const
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment