"doc/git@developer.sourcefind.cn:wqshmzh/ktransformers.git" did not exist on "de3faaf55d2125b02660ce83e625ec0851c304f1"
Unverified Commit 661046c6 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Refactor runtime compiled kernels to use the same compile_ops pipeline (#1125)

This adds the infrastructure so we can compile everything in parallel, whereas before only pointwise kernels were compiled in parallel. This will also directly integrate with lowering and the gpu-driver. The kernels for pointwise and roialign are using this infrastructure. Scatternd is not since it does require standard shape.

This also makes it easier to add new runtime compiled kernels in the future.
parent 024b4abc
...@@ -43,7 +43,7 @@ struct roialign ...@@ -43,7 +43,7 @@ struct roialign
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(3).standard(); check_shapes{inputs, *this}.has(3);
auto x_lens = inputs.at(0).lens(); auto x_lens = inputs.at(0).lens();
auto roi_lens = inputs.at(1).lens(); auto roi_lens = inputs.at(1).lens();
auto bi_lens = inputs.at(2).lens(); auto bi_lens = inputs.at(2).lens();
......
...@@ -119,6 +119,7 @@ target_link_libraries(kernel_file_check compile_for_gpu) ...@@ -119,6 +119,7 @@ target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check(kernel_file_check) rocm_clang_tidy_check(kernel_file_check)
file(GLOB JIT_GPU_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/jit/*.cpp)
add_library(migraphx_gpu add_library(migraphx_gpu
abs.cpp abs.cpp
analyze_streams.cpp analyze_streams.cpp
...@@ -131,9 +132,7 @@ add_library(migraphx_gpu ...@@ -131,9 +132,7 @@ add_library(migraphx_gpu
compile_ops.cpp compile_ops.cpp
compile_hip.cpp compile_hip.cpp
compile_hip_code_object.cpp compile_hip_code_object.cpp
compile_pointwise.cpp compiler.cpp
compile_roialign.cpp
compile_scatternd.cpp
concat.cpp concat.cpp
convert.cpp convert.cpp
convolution.cpp convolution.cpp
...@@ -171,6 +170,7 @@ add_library(migraphx_gpu ...@@ -171,6 +170,7 @@ add_library(migraphx_gpu
target.cpp target.cpp
topk.cpp topk.cpp
write_literals.cpp write_literals.cpp
${JIT_GPU_SRCS}
) )
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu) set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
......
...@@ -248,14 +248,6 @@ std::string enum_params(std::size_t count, std::string param) ...@@ -248,14 +248,6 @@ std::string enum_params(std::size_t count, std::string param)
return join_strings(items, ","); return join_strings(items, ",");
} }
std::size_t compute_global(std::size_t n, std::size_t local)
{
std::size_t groups = (n + local - 1) / local;
// max possible number of blocks is set to 1B (1,073,741,824)
std::size_t nglobal = std::min<std::size_t>(1073741824, groups) * local;
return nglobal;
}
#endif // MIGRAPHX_USE_HIPRTC #endif // MIGRAPHX_USE_HIPRTC
} // namespace gpu } // namespace gpu
......
...@@ -93,6 +93,32 @@ const std::vector<std::string>& compiler_warnings() ...@@ -93,6 +93,32 @@ const std::vector<std::string>& compiler_warnings()
return warnings; return warnings;
} }
void hip_compile_options::set_launch_params(
const value& v,
const std::function<std::size_t(std::size_t local)>& compute_global,
std::size_t default_local)
{
local = v.get("local", default_local);
if(v.contains("global"))
global = v.at("global").to<std::size_t>();
else
global = compute_global(local);
}
std::function<std::size_t(std::size_t local)>
compute_global_for(context& ctx, std::size_t n, std::size_t over)
{
assert(over > 0);
std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu();
return [n, over, max_global](std::size_t local) {
std::size_t groups = (n + local - 1) / local;
std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local;
return nglobal;
};
}
operation compile_hip_code_object(const std::string& content, hip_compile_options options) operation compile_hip_code_object(const std::string& content, hip_compile_options options)
{ {
std::vector<src_file> srcs; std::vector<src_file> srcs;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <migraphx/par_for.hpp> #include <migraphx/par_for.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/op/identity.hpp> #include <migraphx/op/identity.hpp>
#include <migraphx/gpu/compile_pointwise.hpp> #include <migraphx/gpu/compiler.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -40,35 +40,9 @@ struct precompile_op ...@@ -40,35 +40,9 @@ struct precompile_op
MIGRAPHX_REGISTER_OP(precompile_op); MIGRAPHX_REGISTER_OP(precompile_op);
struct pointwise_compiler
{
std::string name() const { return "pointwise"; }
operation apply(context& ctx, instruction_ref ins, const operation&) const
{
assert(not ins->module_inputs().empty());
auto* pm = ins->module_inputs().front();
return compile_pointwise(ctx, to_shapes(ins->inputs()), *pm);
}
};
using compiler_function = std::function<operation(context&, instruction_ref, operation)>;
template <class T>
compiler_function make_compiler_function(T x)
{
return {[=](auto&&... xs) { return x.apply(xs...); }};
}
template <class... Ts>
std::unordered_map<std::string, compiler_function> make_compilers(Ts... xs)
{
return {{xs.name(), make_compiler_function(xs)}...};
}
struct compiled_result struct compiled_result
{ {
operation op; compiler_replace replace;
instruction_ref ins; instruction_ref ins;
}; };
...@@ -82,7 +56,6 @@ void par_compile(std::size_t n, F f) ...@@ -82,7 +56,6 @@ void par_compile(std::size_t n, F f)
void compile_ops::apply(module& m) const void compile_ops::apply(module& m) const
{ {
auto compilers = make_compilers(pointwise_compiler{});
std::vector<std::function<compiled_result()>> compiles; std::vector<std::function<compiled_result()>> compiles;
for(auto ins : iterator_for(m)) for(auto ins : iterator_for(m))
...@@ -90,15 +63,15 @@ void compile_ops::apply(module& m) const ...@@ -90,15 +63,15 @@ void compile_ops::apply(module& m) const
if(ins->name() != "gpu::precompile_op") if(ins->name() != "gpu::precompile_op")
continue; continue;
operation preop = any_cast<precompile_op>(ins->get_operator()).op; operation preop = any_cast<precompile_op>(ins->get_operator()).op;
assert(contains(compilers, preop.name())); compiles.emplace_back([=]() -> compiled_result {
auto c = compilers[preop.name()]; return {compile(*ctx, ins, preop), ins};
compiles.emplace_back([=]() -> compiled_result { return {c(*ctx, ins, preop), ins}; }); });
} }
std::vector<compiled_result> results(compiles.size()); std::vector<compiled_result> results(compiles.size());
par_compile(compiles.size(), [&](auto i) { results[i] = compiles[i](); }); par_compile(compiles.size(), [&](auto i) { results[i] = compiles[i](); });
for(const auto& cr : results) for(const auto& cr : results)
{ {
m.replace_instruction(cr.ins, cr.op, cr.ins->inputs()); cr.replace(m, cr.ins);
} }
} }
......
#include <migraphx/gpu/compile_pointwise.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
static const char* const pointwise_kernel = R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void kernel(${params})
{
pointwise(${lambda}, ${args});
}
}
} // namespace migraphx
int main() {}
)__migraphx__";
operation compile_pointwise(context&,
const std::vector<shape>& inputs,
const std::string& lambda,
const std::string& preamble)
{
hip_compile_options options;
options.global = compute_global(inputs.front().elements());
options.local = 1024;
options.inputs = inputs;
options.output = inputs.back();
options.virtual_inputs = reduce_dims(inputs);
options.params = "-Wno-float-equal";
auto src = interpolate_string(pointwise_kernel,
{{"params", enum_params(inputs.size(), "void * private_p")},
{"args", enum_params(inputs.size(), "private_p")},
{"lambda", lambda},
{"preamble", preamble}});
return compile_hip_code_object(src, options);
}
operation compile_pointwise(context& ctx, const std::vector<shape>& inputs, module m)
{
run_passes(m, {eliminate_common_subexpression{}, dead_code_elimination{}});
cpp_generator g;
g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
g.add_point_op("sign", "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
g.add_point_op("less", "migraphx::abs(${0} < ${1})");
g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
g.add_point_op("not", "migraphx::abs(not ${0})");
// Add explict conversions
g.fresult(
[](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; });
auto name =
g.create_function(g.generate_module(m).set_attributes({"__device__"}).set_generic_types(m));
return compile_pointwise((ctx), inputs, "MIGRAPHX_LIFT(" + name + ")", g.str());
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/compiler.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
auto& compiler_map()
{
static std::unordered_map<std::string, compiler_compile> m; // NOLINT
return m;
}
auto& compiler_op_map()
{
static std::unordered_map<std::string, compiler_compile_op> m; // NOLINT
return m;
}
void register_compiler(const std::string& name, compiler_compile c, compiler_compile_op cop)
{
compiler_map()[name] = std::move(c);
compiler_op_map()[name] = std::move(cop);
}
bool has_compiler_for(const std::string& name) { return compiler_map().count(name) > 0; }
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op)
{
return compiler_map().at(op.name())(ctx, ins, op);
}
operation
compile_op(const std::string& name, context& ctx, const std::vector<shape>& inputs, const value& v)
{
return compiler_op_map().at(name)(ctx, inputs, v);
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
file(GLOB GPU_DRIVER_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
add_executable(gpu-driver add_executable(gpu-driver
action.cpp ${GPU_DRIVER_SRCS}
compile_pointwise.cpp
main.cpp
parser.cpp
perf.cpp
run_op.cpp
) )
target_include_directories(gpu-driver PRIVATE include) target_include_directories(gpu-driver PRIVATE include)
target_link_libraries(gpu-driver PRIVATE migraphx_gpu) target_link_libraries(gpu-driver PRIVATE migraphx_gpu)
#include <migraphx/gpu/driver/action.hpp> #include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/driver/perf.hpp> #include <migraphx/gpu/driver/perf.hpp>
#include <migraphx/gpu/compile_pointwise.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
namespace migraphx { namespace migraphx {
...@@ -8,13 +8,13 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -8,13 +8,13 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace driver { namespace driver {
struct compile_pointwise : action<compile_pointwise> struct compile_op : action<compile_op>
{ {
static void apply(const parser& p, const value& v) static void apply(const parser& p, const value& v)
{ {
context ctx; context ctx;
auto inputs = p.parse_shapes(v.at("inputs")); auto inputs = p.parse_shapes(v.at("inputs"));
auto op = gpu::compile_pointwise(ctx, inputs, v.at("lambda").to<std::string>()); auto op = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v);
double t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); double t = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
std::cout << op << ": " << t << "ms" << std::endl; std::cout << op << ": " << t << "ms" << std::endl;
} }
......
...@@ -17,8 +17,6 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std ...@@ -17,8 +17,6 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
std::string enum_params(std::size_t count, std::string param); std::string enum_params(std::size_t count, std::string param);
std::size_t compute_global(std::size_t n, std::size_t local = 1024);
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -8,6 +8,8 @@ namespace migraphx { ...@@ -8,6 +8,8 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
struct context;
struct hip_compile_options struct hip_compile_options
{ {
std::size_t global; std::size_t global;
...@@ -17,8 +19,24 @@ struct hip_compile_options ...@@ -17,8 +19,24 @@ struct hip_compile_options
std::string kernel_name = "kernel"; std::string kernel_name = "kernel";
std::string params = ""; std::string params = "";
std::vector<shape> virtual_inputs = {}; std::vector<shape> virtual_inputs = {};
/**
* @brief Set the launch parameters but allow v to override the values
*
* @param v A value class which can have a "global" and/or "local" keys to override the default
* global and local
* @param compute_global A function used to compute the global based on the local
* @param default_local The defaul local to use if its missing from the v parameter
*/
void set_launch_params(const value& v,
const std::function<std::size_t(std::size_t local)>& compute_global,
std::size_t default_local = 1024);
}; };
/// Compute global for n elements, but max out on target-specific upper limit
std::function<std::size_t(std::size_t local)>
compute_global_for(context& ctx, std::size_t n, std::size_t over = 1);
operation compile_hip_code_object(const std::string& content, hip_compile_options options); operation compile_hip_code_object(const std::string& content, hip_compile_options options);
} // namespace gpu } // namespace gpu
......
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
namespace gpu {
struct context;
operation compile_pointwise(context& ctx,
const std::vector<shape>& inputs,
const std::string& lambda,
const std::string& preamble = "");
operation compile_pointwise(context& ctx, const std::vector<shape>& inputs, module m);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_ROIALIGN_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_ROIALIGN_HPP
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
operation compile_roialign(context& ctx, const std::vector<shape>& io_shapes, const value& val);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILE_ROIALIGN_HPP
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_SCATTERND_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_SCATTERND_HPP
#include <migraphx/config.hpp>
#include <migraphx/operation.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
operation
compile_scatternd(context& ctx, const std::vector<shape>& io_shapes, const std::string& reduction);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILE_SCATTERND_HPP
#ifndef MIGRAPHX_GUARD_GPU_COMPILER_HPP
#define MIGRAPHX_GUARD_GPU_COMPILER_HPP
#include <migraphx/config.hpp>
#include <migraphx/auto_register.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/value.hpp>
#include <migraphx/module.hpp>
#include <migraphx/instruction.hpp>
#include <functional>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
using compiler_replace = std::function<void(module& m, instruction_ref ins)>;
using compiler_compile = std::function<compiler_replace(context&, instruction_ref, operation)>;
using compiler_compile_op =
std::function<operation(context&, const std::vector<shape>& inputs, const value&)>;
void register_compiler(const std::string& name, compiler_compile c, compiler_compile_op cop);
bool has_compiler_for(const std::string& name);
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op);
operation
compile_op(const std::string& name, context& ctx, const std::vector<shape>& inputs, const value& v);
template <class T>
void register_compiler()
{
T c;
for(auto&& name : c.names())
{
register_compiler(
name,
[=](auto&&... xs) { return c.compile(std::forward<decltype(xs)>(xs)...); },
[=](auto&&... xs) { return c.compile_op(std::forward<decltype(xs)>(xs)...); });
}
}
struct register_compiler_action
{
template <class T>
static void apply()
{
register_compiler<T>();
}
};
template <class T>
using auto_register_compiler = auto_register<register_compiler_action, T>;
template <class Derived>
struct compiler : auto_register_compiler<Derived>
{
auto replace(const operation& op) const
{
return
[=](module& m, instruction_ref ins) { m.replace_instruction(ins, op, ins->inputs()); };
}
operation compile_op(context&, const std::vector<shape>&, const value&) const { return {}; }
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_COMPILER_HPP
...@@ -154,6 +154,13 @@ struct hip_device ...@@ -154,6 +154,13 @@ struct hip_device
std::size_t get_cu_count() const { return device_props.multiProcessorCount; } std::size_t get_cu_count() const { return device_props.multiProcessorCount; }
std::size_t get_max_workitems_per_cu() const
{
return device_props.maxThreadsPerMultiProcessor;
}
std::size_t get_max_workitems_per_block() const { return device_props.maxThreadsPerBlock; }
private: private:
std::size_t device_id = 0; std::size_t device_id = 0;
std::size_t current_stream = 0; std::size_t current_stream = 0;
......
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
static const char* const pointwise_kernel = R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void kernel(${params})
{
pointwise(${lambda}, ${args});
}
}
} // namespace migraphx
)__migraphx__";
struct pointwise_compiler : compiler<pointwise_compiler>
{
std::vector<std::string> names() const { return {"pointwise"}; }
static std::size_t oversubscribe(const std::vector<shape>& inputs)
{
if(std::any_of(inputs.begin(), inputs.end(), [](const auto& s) { return s.broadcasted(); }))
return 1;
else
return 4;
}
static std::size_t vectorize_elements(const std::vector<shape>& inputs)
{
std::size_t n = inputs.front().elements();
if(std::all_of(inputs.begin(), inputs.end(), [](const auto& s) {
return s.packed() or s.broadcasted();
}))
{
if((n % 4) == 0)
return n / 4;
else if((n % 2) == 0)
return n / 2;
}
return n;
}
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
hip_compile_options options;
options.set_launch_params(
v, compute_global_for(ctx, vectorize_elements(inputs), oversubscribe(inputs)));
options.inputs = inputs;
options.output = inputs.back();
options.virtual_inputs = reduce_dims(inputs);
options.params = "-Wno-float-equal";
auto src = interpolate_string(pointwise_kernel,
{{"params", enum_params(inputs.size(), "void * private_p")},
{"args", enum_params(inputs.size(), "private_p")},
{"lambda", v.at("lambda").to<std::string>()},
{"preamble", v.get("preamble", std::string{})}});
return compile_hip_code_object(src, options);
}
compiler_replace compile(context& ctx, instruction_ref ins, const operation&) const
{
assert(not ins->module_inputs().empty());
auto* pm = ins->module_inputs().front();
run_passes(*pm, {eliminate_common_subexpression{}, dead_code_elimination{}});
cpp_generator g;
g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
g.add_point_op("sign",
"${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
g.add_point_op("less", "migraphx::abs(${0} < ${1})");
g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
g.add_point_op("not", "migraphx::abs(not ${0})");
// Add explict conversions
g.fresult(
[](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; });
auto name = g.create_function(
g.generate_module(*pm).set_attributes({"__device__"}).set_generic_types(*pm));
std::string lambda = "MIGRAPHX_LIFT(" + name + ")";
return replace(
compile_op(ctx, to_shapes(ins->inputs()), {{"lambda", lambda}, {"preamble", g.str()}}));
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/compile_roialign.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp> #include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp> #include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -37,46 +43,46 @@ __global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y ...@@ -37,46 +43,46 @@ __global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y
} // namespace migraphx } // namespace migraphx
int main() {}
)__migraphx__"; )__migraphx__";
operation compile_roialign(context&, const std::vector<shape>& io_shapes, const value& val) struct roialign_compiler : compiler<roialign_compiler>
{ {
hip_compile_options options; std::vector<std::string> names() const { return {"roialign"}; }
auto out_s = io_shapes.back();
options.local = 128; operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
options.global = compute_global(out_s.elements(), options.local); {
options.inputs = io_shapes; hip_compile_options options;
options.output = out_s; options.set_launch_params(v, compute_global_for(ctx, inputs.back().elements()), 128);
options.kernel_name = "roialign_kernel"; options.output = inputs.back();
options.virtual_inputs = io_shapes; options.inputs = inputs;
options.kernel_name = "roialign_kernel";
// sampling_ratio
assert(val.contains("sampling_ratio")); // sampling_ratio
auto sampling_ratio = val.at("sampling_ratio").to<int64_t>(); options.params += " -DSAMPLING_RATIO=" + v.at("sampling_ratio").to<std::string>();
options.params += " -DSAMPLING_RATIO=" + std::to_string(sampling_ratio);
// pooling_mode
// pooling_mode auto mode = v.at("mode").to<migraphx::op::pooling_mode>();
assert(val.contains("mode")); std::string is_avg_pooling =
auto mode = val.at("mode").to<migraphx::op::pooling_mode>(); (mode == migraphx::op::pooling_mode::average) ? "true" : "false";
bool is_avg_pooling = (mode == migraphx::op::pooling_mode::average); options.params += " -DIS_AVG_POOLING=" + is_avg_pooling;
options.params += " -DIS_AVG_POOLING=" + std::to_string(static_cast<int>(is_avg_pooling));
// coord_trans_mode
// coord_trans_mode auto ctm = v.at("coordinate_transformation_mode").to<std::string>();
assert(val.contains("coordinate_transformation_mode")); float rois_offset = (ctm == "output_half_pixel") ? -0.5f : 0.0f;
auto ctm = val.at("coordinate_transformation_mode").to<std::string>(); options.params += " -DROIS_OFFSET=" + std::to_string(rois_offset);
float rois_offset = (ctm == "output_half_pixel") ? -0.5f : 0.0f;
options.params += " -DROIS_OFFSET=" + std::to_string(rois_offset); // spatial_scale
options.params += " -DSPATIAL_SCALE=" + v.at("spatial_scale").to<std::string>();
// spatial_scale
assert(val.contains("spatial_scale")); return compile_hip_code_object(roialign_kernel, options);
float spatial_scale = val.at("spatial_scale").to<float>(); }
options.params += " -DSPATIAL_SCALE=" + std::to_string(spatial_scale);
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
return compile_hip_code_object(roialign_kernel, options); {
} return replace(compile_op(ctx, to_shapes(ins->inputs()), op.to_value()));
} // namespace gpu }
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#include <migraphx/gpu/compile_scatternd.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp> #include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp> #include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp> #include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -25,7 +31,7 @@ extern "C" { ...@@ -25,7 +31,7 @@ extern "C" {
__global__ void scatternd_kernel(void* in_indices, void* in_updates, void* output) __global__ void scatternd_kernel(void* in_indices, void* in_updates, void* output)
{ {
make_tensors()(in_indices, in_updates, output)([](auto&&... xs) { make_tensors()(in_indices, in_updates, output)([](auto&&... xs) {
scatternd(xs..., REDUCTION); scatternd(xs..., ${reduction}{});
}); });
} }
...@@ -33,28 +39,50 @@ __global__ void scatternd_kernel(void* in_indices, void* in_updates, void* outpu ...@@ -33,28 +39,50 @@ __global__ void scatternd_kernel(void* in_indices, void* in_updates, void* outpu
} // namespace migraphx } // namespace migraphx
int main() {}
)__migraphx__"; )__migraphx__";
operation struct scatternd_compiler : compiler<scatternd_compiler>
compile_scatternd(context&, const std::vector<shape>& io_shapes, const std::string& reduction)
{ {
hip_compile_options options; std::vector<std::string> names() const
auto out_s = io_shapes.back(); {
options.local = 1024; return {"scatternd_none", "scatternd_add", "scatternd_mul"};
options.global = compute_global(io_shapes.at(1).elements(), options.local); }
options.inputs = io_shapes;
options.output = out_s;
options.kernel_name = "scatternd_kernel";
options.virtual_inputs = io_shapes;
options.params += " -DREDUCTION=assign_" + reduction + "{}";
return compile_hip_code_object(scatternd_kernel, options);
}
} // namespace gpu operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
hip_compile_options options;
options.set_launch_params(v, compute_global_for(ctx, inputs.at(1).elements()));
auto out_s = inputs.back();
options.inputs = inputs;
options.output = out_s;
options.kernel_name = "scatternd_kernel";
options.virtual_inputs = inputs;
auto reduction = "assign_" + v.get("reduction", std::string{"none"});
auto src = interpolate_string(scatternd_kernel, {{"reduction", reduction}});
return compile_hip_code_object(src, options);
}
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{
assert(starts_with(op.name(), "scatternd_"));
auto reduction = op.name().substr(10);
return insert(compile_op(ctx,
to_shapes({ins->inputs().begin() + 1, ins->inputs().end()}),
{{"reduction", reduction}}));
}
compiler_replace insert(const operation& op) const
{
return [=](module& m, instruction_ref ins) {
auto args = ins->inputs();
args.back() =
m.insert_instruction(ins, make_op("hip::copy"), args.front(), args.back());
args.erase(args.begin());
return m.replace_instruction(ins, op, args);
};
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
#include <migraphx/gpu/abs.hpp> #include <migraphx/gpu/abs.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp> #include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/compile_roialign.hpp>
#include <migraphx/gpu/compile_scatternd.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp> #include <migraphx/gpu/deconvolution.hpp>
...@@ -42,6 +40,7 @@ ...@@ -42,6 +40,7 @@
#include <migraphx/gpu/rocblas.hpp> #include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/unary_not.hpp> #include <migraphx/gpu/unary_not.hpp>
#include <migraphx/gpu/where.hpp> #include <migraphx/gpu/where.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <utility> #include <utility>
...@@ -195,8 +194,6 @@ struct miopen_apply ...@@ -195,8 +194,6 @@ struct miopen_apply
add_extend_op("softmax"); add_extend_op("softmax");
add_extend_op("topk"); add_extend_op("topk");
add_precompile_op("pointwise");
add_batch_norm_inference_op(); add_batch_norm_inference_op();
add_convolution_op(); add_convolution_op();
add_deconvolution_op(); add_deconvolution_op();
...@@ -207,8 +204,6 @@ struct miopen_apply ...@@ -207,8 +204,6 @@ struct miopen_apply
add_neg_op(); add_neg_op();
add_nms_op(); add_nms_op();
add_quant_convolution_op(); add_quant_convolution_op();
add_roialign();
add_scatternd();
} }
void copy_params() void copy_params()
...@@ -262,11 +257,28 @@ struct miopen_apply ...@@ -262,11 +257,28 @@ struct miopen_apply
{ {
check_shape(s, apply_map.at(it->name())(it)); check_shape(s, apply_map.at(it->name())(it));
} }
else if(has_compiler_for(it->name()))
{
check_shape(s, insert_precompile_op(it));
}
} }
copy_params(); copy_params();
} }
instruction_ref insert_precompile_op(instruction_ref ins)
{
auto output = insert_allocation(ins, ins->get_shape());
std::vector<instruction_ref> refs = ins->inputs();
refs.push_back(output);
return mod->replace_instruction(
ins,
make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}),
refs,
ins->module_inputs());
}
instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "") instruction_ref insert_allocation(instruction_ref ins, const shape& s, std::string tag = "")
{ {
// Instruction's output is an input of the ret instruction // Instruction's output is an input of the ret instruction
...@@ -396,21 +408,6 @@ struct miopen_apply ...@@ -396,21 +408,6 @@ struct miopen_apply
}); });
} }
void add_precompile_op(const std::string& name)
{
apply_map.emplace(name, [=](instruction_ref ins) {
auto output = insert_allocation(ins, ins->get_shape());
std::vector<instruction_ref> refs = ins->inputs();
refs.push_back(output);
return mod->replace_instruction(
ins,
make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}),
refs,
ins->module_inputs());
});
}
void add_batch_norm_inference_op() void add_batch_norm_inference_op()
{ {
apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) { apply_map.emplace("batch_norm_inference", [=](instruction_ref ins) {
...@@ -501,75 +498,6 @@ struct miopen_apply ...@@ -501,75 +498,6 @@ struct miopen_apply
}); });
} }
void add_roialign()
{
apply_map.emplace("roialign", [=](instruction_ref ins) {
auto s = ins->get_shape();
auto op_val = ins->get_operator().to_value();
auto output = insert_allocation(ins, s);
auto args = ins->inputs();
args.push_back(output);
auto io_shapes = to_shapes(args);
auto co = compile_roialign(get_context(), io_shapes, op_val);
return mod->replace_instruction(ins, co, args);
});
}
void add_scatternd()
{
apply_map.emplace("scatternd_none", [=](instruction_ref ins) {
auto s = ins->get_shape();
auto op_val = ins->get_operator().to_value();
auto output = insert_allocation(ins, s);
auto args = ins->inputs();
args.push_back(output);
auto io_shapes = to_shapes(args);
io_shapes.erase(io_shapes.begin());
const std::string reduction = "none";
auto co = compile_scatternd(get_context(), io_shapes, reduction);
auto copy = mod->insert_instruction(ins, make_op("hip::copy"), args.front(), output);
args.back() = copy;
args.erase(args.begin());
return mod->replace_instruction(ins, co, args);
});
apply_map.emplace("scatternd_add", [=](instruction_ref ins) {
auto s = ins->get_shape();
auto op_val = ins->get_operator().to_value();
auto output = insert_allocation(ins, s);
auto args = ins->inputs();
args.push_back(output);
auto io_shapes = to_shapes(args);
io_shapes.erase(io_shapes.begin());
const std::string reduction = "add";
auto co = compile_scatternd(get_context(), io_shapes, reduction);
auto copy = mod->insert_instruction(ins, make_op("hip::copy"), args.front(), output);
args.back() = copy;
args.erase(args.begin());
return mod->replace_instruction(ins, co, args);
});
apply_map.emplace("scatternd_mul", [=](instruction_ref ins) {
auto s = ins->get_shape();
auto op_val = ins->get_operator().to_value();
auto output = insert_allocation(ins, s);
auto args = ins->inputs();
args.push_back(output);
auto io_shapes = to_shapes(args);
io_shapes.erase(io_shapes.begin());
const std::string reduction = "mul";
auto co = compile_scatternd(get_context(), io_shapes, reduction);
auto copy = mod->insert_instruction(ins, make_op("hip::copy"), args.front(), output);
args.back() = copy;
args.erase(args.begin());
return mod->replace_instruction(ins, co, args);
});
}
// replace the loop operator with gpu_loop operator // replace the loop operator with gpu_loop operator
void add_loop_op() void add_loop_op()
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment