"benchmark/git@developer.sourcefind.cn:change/sglang.git" did not exist on "417b44eba8b94f50b209b35dd6dcfa04a6609551"
Commit acc58cfe authored by Paul's avatar Paul
Browse files

Merge branch 'jit-concat-pointwise' into dense-opt

parents c822c48d e60a7d5e
...@@ -385,9 +385,13 @@ instruction_ref module::move_instruction(instruction_ref src, instruction_ref ds ...@@ -385,9 +385,13 @@ instruction_ref module::move_instruction(instruction_ref src, instruction_ref ds
instruction_ref module::move_instructions(instruction_ref src, instruction_ref dst) instruction_ref module::move_instructions(instruction_ref src, instruction_ref dst)
{ {
this->move_instruction(src, dst);
for(auto ins : src->inputs()) for(auto ins : src->inputs())
this->move_instruction(ins, src); {
if(not contains(this->impl->instructions, ins))
continue;
this->move_instructions(ins, dst);
}
this->move_instruction(src, dst);
return src; return src;
} }
......
...@@ -412,6 +412,24 @@ struct find_concat_op ...@@ -412,6 +412,24 @@ struct find_concat_op
} }
}; };
void move_instructions_back(module& m, instruction_ref pos, std::vector<instruction_ref> inss)
{
auto start = range(m.begin(), pos);
for(auto ins : iterator_for(start))
{
auto it = std::find(inss.begin(), inss.end(), ins);
if(it != inss.end())
inss.erase(it);
}
for(auto ins : inss)
{
if(not m.has_instruction(ins))
continue;
move_instructions_back(m, pos, ins->inputs());
m.move_instruction(ins, pos);
}
}
std::vector<instruction_ref> get_splits(instruction_ref ins) std::vector<instruction_ref> get_splits(instruction_ref ins)
{ {
std::vector<instruction_ref> result; std::vector<instruction_ref> result;
...@@ -587,8 +605,7 @@ struct find_splits ...@@ -587,8 +605,7 @@ struct find_splits
})) }))
return; return;
for(auto data : data_args) move_instructions_back(m, ins, data_args);
m.move_instructions(data, ins);
auto slice_op = any_cast<op::slice>(splits.front()->get_operator()); auto slice_op = any_cast<op::slice>(splits.front()->get_operator());
assert(not slice_op.axes.empty()); assert(not slice_op.axes.empty());
...@@ -841,8 +858,7 @@ struct find_conv_dot_horiz_fusion ...@@ -841,8 +858,7 @@ struct find_conv_dot_horiz_fusion
concat_axis = axis; concat_axis = axis;
} }
for(auto arg : args) move_instructions_back(m, input, args);
m.move_instructions(arg, input);
// TODO: Check if axes match // TODO: Check if axes match
auto concat = auto concat =
m.insert_instruction(input, make_op("concat", {{"axis", concat_axis}}), args); m.insert_instruction(input, make_op("concat", {{"axis", concat_axis}}), args);
......
...@@ -39,19 +39,26 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL); ...@@ -39,19 +39,26 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
struct precompile_op struct precompile_op
{ {
operation op = op::identity{}; operation op = op::identity{};
std::size_t additional_args = 1;
bool ignore_modules = false;
template <class Self, class F> template <class Self, class F>
static auto reflect(Self& self, F f) static auto reflect(Self& self, F f)
{ {
return pack(f(self.op, "op")); return pack(f(self.op, "op"),
f(self.additional_args, "additional_args"),
f(self.ignore_modules, "ignore_modules"));
} }
std::string name() const { return "gpu::precompile_op"; } std::string name() const { return "gpu::precompile_op"; }
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{ {
inputs.pop_back(); // Pop off additional args
inputs.resize(inputs.size() - additional_args);
if(ignore_modules)
return op.compute_shape(inputs);
return op.compute_shape(inputs, mods); return op.compute_shape(inputs, mods);
} }
......
...@@ -1183,6 +1183,34 @@ struct find_layernorm_pointwise ...@@ -1183,6 +1183,34 @@ struct find_layernorm_pointwise
} }
}; };
struct find_concat_pointwise
{
auto matcher() const
{
return precompile_name("pointwise")(
match::arg(0)(precompile_name("concat").bind("concat")));
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto concat = r.instructions["concat"];
auto* pm = ins->module_inputs().front();
if(not concat->module_inputs().empty())
return;
auto inputs = concat->inputs();
inputs.pop_back();
inputs.insert(inputs.end(), ins->inputs().begin() + 1, ins->inputs().end());
auto op = concat->get_operator();
op.from_value({{"additional_args", ins->inputs().size() - 1}, {"ignore_modules", true}});
m.replace_instruction(ins, op, inputs, {pm});
}
};
void fuse_ops::apply(module& m) const void fuse_ops::apply(module& m) const
{ {
match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math}); match::find_matches(m, find_contiguous_pointwise{}, find_gelu{}, find_gelu_new{fast_math});
...@@ -1206,6 +1234,7 @@ void fuse_ops::apply(module& m) const ...@@ -1206,6 +1234,7 @@ void fuse_ops::apply(module& m) const
find_triadd_layernorm{}, find_triadd_layernorm{},
find_gemm_add{}, find_gemm_add{},
find_layernorm_pointwise{}, find_layernorm_pointwise{},
find_concat_pointwise{},
find_gemm_pointwise{}, find_gemm_pointwise{},
find_contiguous_tranpose_gemm{}, find_contiguous_tranpose_gemm{},
find_commutative_broadcast{}); find_commutative_broadcast{});
......
...@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT ...@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
static const char* const concat_kernel = R"__migraphx__( static const char* const concat_kernel = R"__migraphx__(
#include <migraphx/kernels/concat.hpp> #include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/vectorize.hpp> #include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp> #include <args.hpp>
namespace migraphx { namespace migraphx {
${preamble}
extern "C" { extern "C" {
__global__ void ${kernel}(${params}) __global__ void ${kernel}(${params})
{ {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) { transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) {
concat<${axis}>(y, xs...); concat<${axis}>(${concat_args})(${post}, y, xs...);
}); });
} }
...@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler> ...@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{ {
// TODO: Use reduce_dims auto num_of_concat_inputs = v.get("concat_inputs", inputs.size() - 1);
hip_compile_options options; hip_compile_options options;
options.inputs = inputs; options.inputs = inputs;
options.output = inputs.back(); options.output = inputs.back();
options.params = "-Wno-float-equal"; options.params = "-Wno-float-equal";
options.kernel_name = v.get("kernel", "concat_kernel");
auto axis = find_fast_axis(options.inputs); auto axis = find_fast_axis(options.inputs);
auto vec = vectorize::elements(axis, options.inputs); auto vec = vectorize::elements(axis, options.inputs);
options.kernel_name = v.get("kernel", "concat_kernel");
options.set_launch_params( options.set_launch_params(
v, compute_global_for(ctx, get_concat_elements(options.inputs) / vec.size, 256)); v, compute_global_for(ctx, get_concat_elements(options.inputs) / vec.size, 256));
auto src = interpolate_string(concat_kernel, auto src = interpolate_string(
{{"kernel", options.kernel_name}, concat_kernel,
{"params", enum_params(inputs.size(), "void * private_p")}, {{"kernel", options.kernel_name},
{"args", enum_params(inputs.size(), "private_p")}, {"params", enum_params(inputs.size(), "void * private_p")},
{"transformers", make_transformer_args(vec)}, {"args", enum_params(inputs.size(), "private_p")},
{"axis", v.at("axis").to<std::string>()}}); {"concat_params", enum_params(num_of_concat_inputs, "auto concat_x")},
{"concat_args", enum_params(num_of_concat_inputs, "concat_x")},
{"post", v.get("post", std::string{"op::id{}"})},
{"transformers", make_transformer_args(vec)},
{"preamble", v.get("preamble", std::string{})},
{"axis", v.at("axis").to<std::string>()}});
return compile_hip_code_object(src, options); return compile_hip_code_object(src, options);
} }
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{ {
return replace(compile_op(ctx, to_shapes(ins->inputs()), op.to_value())); auto v = op.to_value();
if(not ins->module_inputs().empty())
{
auto* pm = ins->module_inputs().front();
v["concat_inputs"] = ins->inputs().size() - pm->get_parameter_names().size();
v["preamble"] = generate_pointwise(*pm, "post_concat");
v["post"] = "MIGRAPHX_LIFT(post_concat)";
v["kernel"] = "concat_" + generate_name_from_ops(*pm) + "_kernel";
}
return replace(compile_op(ctx, to_shapes(ins->inputs()), v));
} }
}; };
......
...@@ -44,6 +44,12 @@ constexpr auto concat_slice(Output out, Input, Start) ...@@ -44,6 +44,12 @@ constexpr auto concat_slice(Output out, Input, Start)
return make_tensor_view(&out[offset], s); return make_tensor_view(&out[offset], s);
} }
template <index_int Axis, class Input, class Start, class... Ts>
constexpr auto concat_slices(Input input, Start start, Ts... xs)
{
return [=](auto f) { f(concat_slice<Axis>(xs, input, start)...); };
}
template <index_int Axis, class Input> template <index_int Axis, class Input>
constexpr auto concat_ends(Input) constexpr auto concat_ends(Input)
{ {
...@@ -51,15 +57,19 @@ constexpr auto concat_ends(Input) ...@@ -51,15 +57,19 @@ constexpr auto concat_ends(Input)
return _c<lens[Axis]>; return _c<lens[Axis]>;
} }
template <index_int Axis, class Output, class... Inputs> template <index_int Axis, class... Inputs>
__device__ void concat(Output output, Inputs... inputs) __device__ auto concat(Inputs... inputs)
{ {
auto idx = make_index(); return [=](auto f, auto... ts) {
fold([&](auto start, auto input) { auto idx = make_index();
auto y = concat_slice<Axis>(output, input, start); fold([&](auto start, auto input) {
idx.global_stride(input.get_shape().elements(), [&](auto i) { y[i] = input[i]; }); concat_slices<Axis>(input, start, ts...)([&](auto y, auto... xs) {
return start + concat_ends<Axis>(input); idx.global_stride(input.get_shape().elements(),
})(_c<0>, inputs...); [&](auto i) { y[i] = f(input[i], xs[i]...); });
});
return start + concat_ends<Axis>(input);
})(_c<0>, inputs...);
};
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -99,7 +99,10 @@ struct mlir_handle ...@@ -99,7 +99,10 @@ struct mlir_handle
mlir_handle(T p) : handle(ptr{p}) {} mlir_handle(T p) : handle(ptr{p}) {}
T get() const { return handle.get().get(); } T get() const
{
return handle.get().get(); // NOLINT(readability-redundant-smartptr-get)
}
T release() { return handle.release().get(); } T release() { return handle.release().get(); }
...@@ -571,7 +574,7 @@ struct mlir_program ...@@ -571,7 +574,7 @@ struct mlir_program
MIGRAPHX_THROW("Failed to compile mlir program"); MIGRAPHX_THROW("Failed to compile mlir program");
} }
std::string get_tune_params() { return get_mlir_perf_for_conv(pp); } std::string get_tune_params() const { return get_mlir_perf_for_conv(pp); }
mlir_context ctx; mlir_context ctx;
MlirLocation location; MlirLocation location;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment