Commit d3267bb3 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into...

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into test_runner_match_input_output
parents 2d9e620b a33d6fa2
...@@ -218,7 +218,7 @@ jobs: ...@@ -218,7 +218,7 @@ jobs:
run: | run: |
echo "leak:dnnl::impl::malloc" > suppressions.txt echo "leak:dnnl::impl::malloc" > suppressions.txt
export LSAN_OPTIONS="suppressions=$(pwd)/suppressions.txt" export LSAN_OPTIONS="suppressions=$(pwd)/suppressions.txt"
rbuild build -d cget -s gh -t check \ rbuild build -d cget -s gh -T check \
-DCMAKE_BUILD_TYPE=${{matrix.configuration}} \ -DCMAKE_BUILD_TYPE=${{matrix.configuration}} \
-DMIGRAPHX_ENABLE_PYTHON=${{matrix.configuration == 'release' && 'On' || 'Off'}} \ -DMIGRAPHX_ENABLE_PYTHON=${{matrix.configuration == 'release' && 'On' || 'Off'}} \
-DCMAKE_CXX_FLAGS_DEBUG="-g1 -Os -fdebug-prefix-map=$PWD=. -fdebug-types-section -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=undefined" \ -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Os -fdebug-prefix-map=$PWD=. -fdebug-types-section -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=undefined" \
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <migraphx/type_name.hpp> #include <migraphx/type_name.hpp>
#include <migraphx/functional.hpp> #include <migraphx/functional.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/rank.hpp>
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
...@@ -106,10 +107,22 @@ struct argument_parser ...@@ -106,10 +107,22 @@ struct argument_parser
return to_string_range(x); return to_string_range(x);
} }
template <class T>
auto as_string_value(rank<1>, const T& x) -> decltype(to_string(x))
{
return to_string(x);
}
template <class T>
std::string as_string_value(rank<0>, const T&)
{
throw std::runtime_error("Can't convert to string");
}
template <class T, MIGRAPHX_REQUIRES(not is_multi_value<T>{})> template <class T, MIGRAPHX_REQUIRES(not is_multi_value<T>{})>
std::string as_string_value(const T& x) std::string as_string_value(const T& x)
{ {
return to_string(x); return as_string_value(rank<1>{}, x);
} }
template <class T, class... Fs> template <class T, class... Fs>
...@@ -122,10 +135,11 @@ struct argument_parser ...@@ -122,10 +135,11 @@ struct argument_parser
return false; return false;
}}); }});
argument& arg = arguments.back(); argument& arg = arguments.back();
arg.type = migraphx::get_type_name<T>(); arg.type = migraphx::get_type_name<T>();
arg.default_value = as_string_value(x);
migraphx::each_args([&](auto f) { f(x, arg); }, fs...); migraphx::each_args([&](auto f) { f(x, arg); }, fs...);
if(not arg.default_value.empty() and arg.nargs > 0)
arg.default_value = as_string_value(x);
} }
template <class... Fs> template <class... Fs>
......
#include "verify.hpp"
#include "argument_parser.hpp" #include "argument_parser.hpp"
#include "command.hpp" #include "command.hpp"
#include "verify.hpp" #include "precision.hpp"
#include "perf.hpp" #include "perf.hpp"
#include "models.hpp" #include "models.hpp"
#include "marker_roctx.hpp" #include "marker_roctx.hpp"
...@@ -288,14 +289,12 @@ struct compiler_target ...@@ -288,14 +289,12 @@ struct compiler_target
struct compiler struct compiler
{ {
static const int q_fp16 = 1;
static const int q_int8 = 2;
loader l; loader l;
program_params parameters; program_params parameters;
compiler_target ct; compiler_target ct;
bool offload_copy = false; bool offload_copy = false;
bool fast_math = true; bool fast_math = true;
int quantize = 0; precision quantize = precision::fp32;
std::vector<std::string> fill0; std::vector<std::string> fill0;
std::vector<std::string> fill1; std::vector<std::string> fill1;
...@@ -312,8 +311,8 @@ struct compiler ...@@ -312,8 +311,8 @@ struct compiler
{"--disable-fast-math"}, {"--disable-fast-math"},
ap.help("Disable fast math optimization"), ap.help("Disable fast math optimization"),
ap.set_value(false)); ap.set_value(false));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(q_fp16)); ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8)); ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(precision::int8));
} }
auto params(const program& p) { return parameters.generate(p, ct.get_target(), offload_copy); } auto params(const program& p) { return parameters.generate(p, ct.get_target(), offload_copy); }
...@@ -325,11 +324,11 @@ struct compiler ...@@ -325,11 +324,11 @@ struct compiler
if(p.is_compiled()) if(p.is_compiled())
return p; return p;
auto t = ct.get_target(); auto t = ct.get_target();
if(quantize == q_fp16) if(quantize == precision::fp16)
{ {
quantize_fp16(p); quantize_fp16(p);
} }
else if(quantize == q_int8) else if(quantize == precision::int8)
{ {
quantize_int8(p, t, {params(p)}); quantize_int8(p, t, {params(p)});
} }
...@@ -377,6 +376,7 @@ struct verify : command<verify> ...@@ -377,6 +376,7 @@ struct verify : command<verify>
bool reduce = false; bool reduce = false;
bool offload_copy = false; bool offload_copy = false;
bool fast_math = true; bool fast_math = true;
precision quantize = precision::fp32;
void parse(argument_parser& ap) void parse(argument_parser& ap)
{ {
l.parse(ap); l.parse(ap);
...@@ -396,6 +396,7 @@ struct verify : command<verify> ...@@ -396,6 +396,7 @@ struct verify : command<verify>
ap.help("Verify each instruction"), ap.help("Verify each instruction"),
ap.set_value(true)); ap.set_value(true));
ap(reduce, {"-r", "--reduce"}, ap.help("Reduce program and verify"), ap.set_value(true)); ap(reduce, {"-r", "--reduce"}, ap.help("Reduce program and verify"), ap.set_value(true));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
} }
void run() void run()
...@@ -412,15 +413,15 @@ struct verify : command<verify> ...@@ -412,15 +413,15 @@ struct verify : command<verify>
if(per_instruction) if(per_instruction)
{ {
verify_instructions(p, t, options, tolerance); verify_instructions(p, t, options, quantize, tolerance);
} }
else if(reduce) else if(reduce)
{ {
verify_reduced_program(p, t, options, m, tolerance); verify_reduced_program(p, t, options, quantize, m, tolerance);
} }
else else
{ {
verify_program(l.file, p, t, options, m, tolerance); verify_program(l.file, p, t, options, quantize, m, tolerance);
} }
} }
}; };
......
#ifndef MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
#define MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
namespace migraphx {
namespace driver {
inline namespace MIGRAPHX_INLINE_NS {
enum class precision
{
fp32,
fp16,
int8
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace driver
} // namespace migraphx
#endif
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <migraphx/verify_args.hpp> #include <migraphx/verify_args.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/quantization.hpp>
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
...@@ -19,9 +20,16 @@ std::vector<argument> run_ref(program p, const parameter_map& inputs) ...@@ -19,9 +20,16 @@ std::vector<argument> run_ref(program p, const parameter_map& inputs)
return out; return out;
} }
std::vector<argument> std::vector<argument> run_target(program p,
run_target(program p, const target& t, const compile_options& options, const parameter_map& inputs) const target& t,
const compile_options& options,
precision quantize,
const parameter_map& inputs)
{ {
if(quantize == precision::fp16)
{
quantize_fp16(p);
}
p.compile(t, options); p.compile(t, options);
parameter_map m; parameter_map m;
...@@ -43,24 +51,24 @@ void verify_program(const std::string& name, ...@@ -43,24 +51,24 @@ void verify_program(const std::string& name,
const program& p, const program& p,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) double tolerance)
{ {
auto x = run_ref(p, inputs); auto x = run_ref(p, inputs);
auto y = run_target(p, t, options, inputs); auto y = run_target(p, t, options, quantize, inputs);
std::size_t output_num = x.size(); std::size_t output_num = x.size();
for(std::size_t i = 0; i < output_num; ++i) for(std::size_t i = 0; i < output_num; ++i)
{ {
verify_args(name, x[i], y[i], tolerance); verify_args(name, x[i], y[i], tolerance);
} }
// std::cout << "cpu: " << x << std::endl;
// std::cout << "gpu: " << y << std::endl;
} }
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
double tolerance) double tolerance)
{ {
const auto* mm_prog = prog.get_main_module(); const auto* mm_prog = prog.get_main_module();
...@@ -92,7 +100,8 @@ void verify_instructions(const program& prog, ...@@ -92,7 +100,8 @@ void verify_instructions(const program& prog,
{ {
std::cout << "Verify: " << ins.name() << std::endl; std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program(ins.name(), p, t, options, create_param_map(p, false), tolerance); verify_program(
ins.name(), p, t, options, quantize, create_param_map(p, false), tolerance);
} }
catch(...) catch(...)
{ {
...@@ -106,6 +115,7 @@ void verify_reduced(program p, ...@@ -106,6 +115,7 @@ void verify_reduced(program p,
int n, int n,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) double tolerance)
{ {
...@@ -114,12 +124,13 @@ void verify_reduced(program p, ...@@ -114,12 +124,13 @@ void verify_reduced(program p,
mm->remove_instructions(last, mm->end()); mm->remove_instructions(last, mm->end());
std::cout << "Verify: " << std::endl; std::cout << "Verify: " << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program(std::to_string(n), p, t, options, inputs, tolerance); verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
} }
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) double tolerance)
{ {
...@@ -127,7 +138,7 @@ void verify_reduced_program(const program& p, ...@@ -127,7 +138,7 @@ void verify_reduced_program(const program& p,
auto n = std::distance(mm->begin(), mm->end()); auto n = std::distance(mm->begin(), mm->end());
for(std::size_t i = 0; i < n; i++) for(std::size_t i = 0; i < n; i++)
{ {
verify_reduced(p, i, t, options, inputs, tolerance); verify_reduced(p, i, t, options, quantize, inputs, tolerance);
} }
} }
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#define MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP #define MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#include "precision.hpp"
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
namespace migraphx { namespace migraphx {
...@@ -11,15 +12,18 @@ void verify_program(const std::string& name, ...@@ -11,15 +12,18 @@ void verify_program(const std::string& name,
const program& p, const program& p,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 100); double tolerance = 100);
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32,
double tolerance = 80); double tolerance = 80);
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 80); double tolerance = 80);
......
...@@ -11,11 +11,13 @@ ...@@ -11,11 +11,13 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inputs) static bool try_compute_shape(instruction_ref ins,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mods)
{ {
try try
{ {
shape new_shape = ins->get_operator().compute_shape(inputs); shape new_shape = ins->get_operator().compute_shape(inputs, mods);
// If the output shape is a standard shape, no need to try its output // If the output shape is a standard shape, no need to try its output
if(new_shape.standard()) if(new_shape.standard())
{ {
...@@ -45,7 +47,7 @@ static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inp ...@@ -45,7 +47,7 @@ static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inp
return (arg == ins) ? new_shape : arg->get_shape(); return (arg == ins) ? new_shape : arg->get_shape();
}); });
if(!try_compute_shape(output, input_shapes)) if(!try_compute_shape(output, input_shapes, mods))
{ {
return false; return false;
} }
...@@ -59,10 +61,12 @@ static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inp ...@@ -59,10 +61,12 @@ static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inp
return true; return true;
} }
static bool try_compute_shape(instruction_ref ins, const std::vector<instruction_ref>& args) static bool try_compute_shape(instruction_ref ins,
const std::vector<instruction_ref>& args,
const std::vector<module_ref>& mods)
{ {
auto inputs = to_shapes(args); auto inputs = to_shapes(args);
return try_compute_shape(ins, inputs); return try_compute_shape(ins, inputs, mods);
} }
void eliminate_contiguous::apply(module& p) const void eliminate_contiguous::apply(module& p) const
...@@ -82,7 +86,7 @@ void eliminate_contiguous::apply(module& p) const ...@@ -82,7 +86,7 @@ void eliminate_contiguous::apply(module& p) const
auto new_args = args; auto new_args = args;
auto prev = arg->inputs().front(); auto prev = arg->inputs().front();
replace(new_args, arg, prev); replace(new_args, arg, prev);
if(try_compute_shape(ins, new_args)) if(try_compute_shape(ins, new_args, ins->module_inputs()))
{ {
instruction::replace_argument(ins, arg, prev); instruction::replace_argument(ins, arg, prev);
} }
......
...@@ -103,7 +103,14 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name()) ...@@ -103,7 +103,14 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
} // namespace operation_operators } // namespace operation_operators
template <class T> template <class T>
auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs) auto compute_shape_op(rank<3>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs))
{
return x.compute_shape(inputs);
}
template <class T>
auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs)) -> decltype(x.normalize_compute_shape(inputs))
{ {
dependent_type<operation, T> y = x; dependent_type<operation, T> y = x;
...@@ -112,77 +119,53 @@ auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& i ...@@ -112,77 +119,53 @@ auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& i
} }
template <class T> template <class T>
auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs) auto compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs, {})) -> decltype(x.compute_shape(inputs, {}))
{ {
return x.compute_shape(inputs, {}); return x.compute_shape(inputs, {});
} }
template <class T> template <class T>
shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&) shape compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
{ {
std::string name = x.name(); std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name); MIGRAPHX_THROW("Shape not computable: " + name);
} }
template <class T> template <class T>
shape normalize_compute_shape_op(const T& x, const std::vector<shape>& inputs) shape compute_shape_op(const T& x, const std::vector<shape>& inputs)
{ {
return normalize_compute_shape_op(rank<2>{}, x, inputs); return compute_shape_op(rank<3>{}, x, inputs);
} }
template <class T> template <class T>
auto compute_shape_op(rank<1>, auto mod_compute_shape_op(rank<1>,
const T& x, const T& x,
const std::vector<shape>& inputs, const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
-> decltype(x.compute_shape(inputs, mod_args)) -> decltype(x.compute_shape(inputs, mod_args))
{ {
return x.compute_shape(inputs, mod_args); return x.compute_shape(inputs, mod_args);
} }
template <class T> template <class T>
shape shape mod_compute_shape_op(rank<0>,
compute_shape_op(rank<0>, const T& x, const std::vector<shape>&, const std::vector<module_ref>&) const T& x,
{ const std::vector<shape>& inputs,
std::string name = x.name(); const std::vector<module_ref>& mod_args)
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return compute_shape_op(rank<1>{}, x, inputs, mod_args);
}
template <class T>
auto normalize_compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args)
-> decltype(x.normalize_compute_shape(inputs, mod_args))
{
return x.normalize_compute_shape(inputs, mod_args);
}
template <class T>
shape normalize_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>&,
const std::vector<module_ref>&)
{ {
if(mod_args.empty())
return compute_shape_op(x, inputs);
std::string name = x.name(); std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name); MIGRAPHX_THROW("Shape not computable: " + name);
} }
template <class T> template <class T>
shape normalize_compute_shape_op(const T& x, shape mod_compute_shape_op(const T& x,
const std::vector<shape>& inputs, const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
{ {
return normalize_compute_shape_op(rank<1>{}, x, inputs, mod_args); return mod_compute_shape_op(rank<1>{}, x, inputs, mod_args);
} }
template <class T> template <class T>
...@@ -855,7 +838,7 @@ struct operation ...@@ -855,7 +838,7 @@ struct operation
T&& private_detail_te_self, T&& private_detail_te_self,
const std::vector<shape>& input) const std::vector<shape>& input)
{ {
return detail::normalize_compute_shape_op(private_detail_te_self, input); return detail::compute_shape_op(private_detail_te_self, input);
} }
template <class T> template <class T>
...@@ -874,7 +857,7 @@ struct operation ...@@ -874,7 +857,7 @@ struct operation
const std::vector<shape>& inputs, const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
{ {
return detail::compute_shape_op(private_detail_te_self, inputs, mod_args); return detail::mod_compute_shape_op(private_detail_te_self, inputs, mod_args);
} }
template <class T> template <class T>
...@@ -1276,7 +1259,7 @@ template <class T> ...@@ -1276,7 +1259,7 @@ template <class T>
inline auto compute_shape(const T& op, const std::vector<shape>& inputs) inline auto compute_shape(const T& op, const std::vector<shape>& inputs)
-> decltype(op.normalize_compute_shape(inputs)) -> decltype(op.normalize_compute_shape(inputs))
{ {
return detail::normalize_compute_shape_op(op, inputs); return detail::compute_shape_op(op, inputs);
} }
inline shape compute_shape(const operation& op, inline shape compute_shape(const operation& op,
...@@ -1301,7 +1284,7 @@ inline auto compute_shape(const T& op, ...@@ -1301,7 +1284,7 @@ inline auto compute_shape(const T& op,
const std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
-> decltype(op.normalize_compute_shape(inputs, mod_args)) -> decltype(op.normalize_compute_shape(inputs, mod_args))
{ {
return detail::normalize_compute_shape_op(op, inputs, mod_args); return detail::compute_shape_op(op, inputs, mod_args);
} }
inline bool is_context_free(const operation& op) { return op.is_context_free(); } inline bool is_context_free(const operation& op) { return op.is_context_free(); }
......
...@@ -168,7 +168,8 @@ inline std::string to_string_range(const std::initializer_list<T>& r) ...@@ -168,7 +168,8 @@ inline std::string to_string_range(const std::initializer_list<T>& r)
} }
template <class T> template <class T>
inline std::string to_string(const T& x) inline auto to_string(const T& x)
-> decltype((std::declval<std::stringstream>() << x), std::string{})
{ {
std::stringstream ss; std::stringstream ss;
ss << x; ss << x;
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_PASSES);
void validate_pass(module& mod, const pass& p, tracer trace) void validate_pass(module& mod, const pass& p, tracer trace)
{ {
(void)mod; (void)mod;
...@@ -82,6 +84,8 @@ module& get_module(module_pass_manager& mpm) { return mpm.get_module(); } ...@@ -82,6 +84,8 @@ module& get_module(module_pass_manager& mpm) { return mpm.get_module(); }
void run_passes(module& mod, const std::vector<pass>& passes, tracer trace) void run_passes(module& mod, const std::vector<pass>& passes, tracer trace)
{ {
if(enabled(MIGRAPHX_TRACE_PASSES{}))
trace = tracer{std::cout};
for(const auto& p : passes) for(const auto& p : passes)
{ {
module_pm{&mod, nullptr, &trace}.run_pass(p); module_pm{&mod, nullptr, &trace}.run_pass(p);
...@@ -90,6 +94,8 @@ void run_passes(module& mod, const std::vector<pass>& passes, tracer trace) ...@@ -90,6 +94,8 @@ void run_passes(module& mod, const std::vector<pass>& passes, tracer trace)
void run_passes(program& prog, const std::vector<pass>& passes, tracer trace) void run_passes(program& prog, const std::vector<pass>& passes, tracer trace)
{ {
if(enabled(MIGRAPHX_TRACE_PASSES{}))
trace = tracer{std::cout};
for(const auto& p : passes) for(const auto& p : passes)
{ {
auto mods = prog.get_modules(); auto mods = prog.get_modules();
......
...@@ -91,28 +91,34 @@ add_library(migraphx_device ...@@ -91,28 +91,34 @@ add_library(migraphx_device
device/unary_not.cpp device/unary_not.cpp
device/where.cpp device/where.cpp
) )
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) add_library(compile_for_gpu INTERFACE)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION}) target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
rocm_clang_tidy_check(migraphx_device) target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
target_compile_options(migraphx_device PRIVATE -std=c++17 -fno-gpu-rdc -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(migraphx_device migraphx hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
set(AMDGPU_TARGETS "gfx803;gfx900;gfx906" CACHE STRING "")
foreach(AMDGPU_TARGET ${AMDGPU_TARGETS})
target_compile_options(migraphx_device PRIVATE -amdgpu-target=${AMDGPU_TARGET})
target_link_libraries(migraphx_device -amdgpu-target=${AMDGPU_TARGET})
endforeach()
else()
target_compile_options(migraphx_device PRIVATE -Wno-cuda-compat)
endif()
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE) check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
if(HAS_HIP_LAMBDA_HOST_DEVICE) if(HAS_HIP_LAMBDA_HOST_DEVICE)
message(STATUS "Enable -fhip-lambda-host-device") message(STATUS "Enable -fhip-lambda-host-device")
target_compile_options(migraphx_device PRIVATE -fhip-lambda-host-device) target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device)
endif() endif()
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries(migraphx_device PRIVATE compile_for_gpu)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>) target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(kernel_file_check EXCLUDE_FROM_ALL)
foreach(KERNEL_FILE ${KERNEL_FILES})
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
endforeach()
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check(kernel_file_check)
add_library(migraphx_gpu add_library(migraphx_gpu
abs.cpp abs.cpp
analyze_streams.cpp analyze_streams.cpp
...@@ -341,7 +347,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels) ...@@ -341,7 +347,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver) add_subdirectory(driver)
rocm_install_targets( rocm_install_targets(
TARGETS migraphx_gpu migraphx_device TARGETS migraphx_gpu migraphx_device compile_for_gpu
INCLUDE INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include
) )
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <migraphx/module.hpp> #include <migraphx/module.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/op/identity.hpp> #include <migraphx/op/identity.hpp>
#include <migraphx/gpu/compile_pointwise.hpp> #include <migraphx/gpu/compile_pointwise.hpp>
...@@ -63,17 +64,31 @@ std::unordered_map<std::string, compiler_function> make_compilers(Ts... xs) ...@@ -63,17 +64,31 @@ std::unordered_map<std::string, compiler_function> make_compilers(Ts... xs)
return {{xs.name(), make_compiler_function(xs)}...}; return {{xs.name(), make_compiler_function(xs)}...};
} }
struct compiled_result
{
operation op;
instruction_ref ins;
};
void compile_ops::apply(module& m) const void compile_ops::apply(module& m) const
{ {
auto compilers = make_compilers(pointwise_compiler{}); auto compilers = make_compilers(pointwise_compiler{});
std::vector<std::function<compiled_result()>> compiles;
for(auto ins : iterator_for(m)) for(auto ins : iterator_for(m))
{ {
if(ins->name() != "gpu::precompile_op") if(ins->name() != "gpu::precompile_op")
continue; continue;
operation preop = any_cast<precompile_op>(ins->get_operator()).op; operation preop = any_cast<precompile_op>(ins->get_operator()).op;
assert(contains(compilers, preop.name())); assert(contains(compilers, preop.name()));
auto op = compilers[preop.name()](*ctx, ins, preop); auto c = compilers[preop.name()];
m.replace_instruction(ins, op, ins->inputs()); compiles.emplace_back([=]() -> compiled_result { return {c(*ctx, ins, preop), ins}; });
}
std::vector<compiled_result> results(compiles.size());
par_for(compiles.size(), 1, [&](auto i) { results[i] = compiles[i](); });
for(const auto& cr : results)
{
m.replace_instruction(cr.ins, cr.op, cr.ins->inputs());
} }
} }
......
...@@ -14,17 +14,29 @@ namespace gpu { ...@@ -14,17 +14,29 @@ namespace gpu {
static const char* const roialign_kernel = R"__migraphx__( static const char* const roialign_kernel = R"__migraphx__(
#include <migraphx/kernels/roialign.hpp> #include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/basic_ops.hpp> #include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp> #include <args.hpp>
using namespace migraphx; namespace migraphx {
extern "C" { extern "C" {
__global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y) __global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y)
{ {
make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) { roialign(xs...); }); make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) {
auto settings = make_roalign_settings(MIGRAPHX_MAKE_CONSTANT(float{ROIS_OFFSET}),
_c<bool{IS_AVG_POOLING}>,
_c<int64_t{SAMPLING_RATIO}>,
MIGRAPHX_MAKE_CONSTANT(float{SPATIAL_SCALE}));
roialign(xs..., settings);
});
} }
} }
} // namespace migraphx
int main() {} int main() {}
)__migraphx__"; )__migraphx__";
......
...@@ -176,23 +176,23 @@ struct array ...@@ -176,23 +176,23 @@ struct array
} }
}; };
template <class T, T... xs> template <class T, T... Xs>
struct integral_const_array : array<T, sizeof...(xs)> struct integral_const_array : array<T, sizeof...(Xs)>
{ {
using base_array = array<T, sizeof...(xs)>; using base_array = array<T, sizeof...(Xs)>;
MIGRAPHX_DEVICE_CONSTEXPR integral_const_array() : base_array({xs...}) {} MIGRAPHX_DEVICE_CONSTEXPR integral_const_array() : base_array({Xs...}) {}
}; };
template <class T, T... xs, class F> template <class T, T... Xs, class F>
constexpr auto transform(integral_const_array<T, xs...>, F f) constexpr auto transform(integral_const_array<T, Xs...>, F f)
{ {
return integral_const_array<T, f(xs)...>{}; return integral_const_array<T, f(Xs)...>{};
} }
template <class T, T... xs, class U, U... ys, class F> template <class T, T... Xs, class U, U... Ys, class F>
constexpr auto transform(integral_const_array<T, xs...>, integral_const_array<U, ys...>, F f) constexpr auto transform(integral_const_array<T, Xs...>, integral_const_array<U, Ys...>, F f)
{ {
return integral_const_array<T, f(xs, ys)...>{}; return integral_const_array<T, f(Xs, Ys)...>{};
} }
template <index_int... Ns> template <index_int... Ns>
......
#ifndef MIGRAPHX_GUARD_KERNELS_DEBUG_HPP #ifndef MIGRAPHX_GUARD_KERNELS_DEBUG_HPP
#define MIGRAPHX_GUARD_KERNELS_DEBUG_HPP #define MIGRAPHX_GUARD_KERNELS_DEBUG_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
namespace migraphx { namespace migraphx {
inline __host__ __device__ void // Workaround hip's broken abort on device code
#ifdef __HIP_DEVICE_COMPILE__
// NOLINTNEXTLINE
#define MIGRAPHX_HIP_NORETURN
#else
// NOLINTNEXTLINE
#define MIGRAPHX_HIP_NORETURN [[noreturn]]
#endif
// noreturn cannot be used on this function because abort in hip is broken
MIGRAPHX_HIP_NORETURN inline __host__ __device__ void
assert_fail(const char* assertion, const char* file, unsigned int line, const char* function) assert_fail(const char* assertion, const char* file, unsigned int line, const char* function)
{ {
printf("%s:%u: %s: assertion '%s' failed.\n", file, line, function, assertion); printf("%s:%u: %s: assertion '%s' failed.\n", file, line, function, assertion);
......
...@@ -168,6 +168,7 @@ constexpr auto transform_args(F f, Fs... fs) ...@@ -168,6 +168,7 @@ constexpr auto transform_args(F f, Fs... fs)
return [=](auto... xs) { return transform_args(f)(xs...)(transform_args(fs...)); }; return [=](auto... xs) { return transform_args(f)(xs...)(transform_args(fs...)); };
} }
// NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \ #define MIGRAPHX_LIFT(...) \
([](auto&&... xs) { return (__VA_ARGS__)(static_cast<decltype(xs)>(xs)...); }) ([](auto&&... xs) { return (__VA_ARGS__)(static_cast<decltype(xs)>(xs)...); })
......
#ifndef MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
#define MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
namespace migraphx {
template <class F>
struct generic_constant
{
static constexpr auto value = F{}();
using value_type = decltype(value);
using type = generic_constant;
constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; }
};
template <class F>
constexpr generic_constant<F> make_generic_constant(F)
{
return {};
}
// NOLINTNEXTLINE
#define MIGRAPHX_MAKE_CONSTANT(x) \
make_generic_constant([] { \
struct fun \
{ \
constexpr auto operator()() const { return x; } \
}; \
return fun{}; \
}())
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_HIP_HPP
#define MIGRAPHX_GUARD_KERNELS_HIP_HPP
// Workaround macro redefinition issue with clang tidy
#if defined(__HIP_PLATFORM_HCC__) && defined(MIGRAPHX_USE_CLANG_TIDY)
#undef __HIP_PLATFORM_HCC__ // NOLINT
#endif
#include <hip/hip_runtime.h>
#endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_INDEX_HPP #ifndef MIGRAPHX_GUARD_KERNELS_INDEX_HPP
#define MIGRAPHX_GUARD_KERNELS_INDEX_HPP #define MIGRAPHX_GUARD_KERNELS_INDEX_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/types.hpp> #include <migraphx/kernels/types.hpp>
namespace migraphx { namespace migraphx {
...@@ -17,7 +17,7 @@ struct index ...@@ -17,7 +17,7 @@ struct index
#ifdef MIGRAPHX_NGLOBAL #ifdef MIGRAPHX_NGLOBAL
return MIGRAPHX_NGLOBAL; return MIGRAPHX_NGLOBAL;
#else #else
return blockDim.x * gridDim.x; return blockDim.x * gridDim.x; // NOLINT
#endif #endif
} }
...@@ -26,7 +26,7 @@ struct index ...@@ -26,7 +26,7 @@ struct index
#ifdef MIGRAPHX_NLOCAL #ifdef MIGRAPHX_NLOCAL
return MIGRAPHX_NLOCAL; return MIGRAPHX_NLOCAL;
#else #else
return blockDim.x; return blockDim.x; // NOLINT
#endif #endif
} }
...@@ -53,7 +53,7 @@ struct index ...@@ -53,7 +53,7 @@ struct index
inline __device__ index make_index() inline __device__ index make_index()
{ {
return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -5,28 +5,30 @@ ...@@ -5,28 +5,30 @@
namespace migraphx { namespace migraphx {
template <class T, T v> template <class T, T V>
struct integral_constant struct integral_constant
{ {
static constexpr T value = v; static constexpr T value = V;
using value_type = T; using value_type = T;
using type = integral_constant; using type = integral_constant;
constexpr operator value_type() const noexcept { return value; } constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; } constexpr value_type operator()() const noexcept { return value; }
}; };
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \ #define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \
template <class T, T v, class U, U w> \ template <class T, T V, class U, U w> \
constexpr inline integral_constant<decltype(v op w), (v op w)> operator op( \ constexpr inline integral_constant<decltype(V op w), (V op w)> operator op( \
integral_constant<T, v>, integral_constant<U, w>) noexcept \ integral_constant<T, V>, integral_constant<U, w>) noexcept \
{ \ { \
return {}; \ return {}; \
} }
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \ #define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \
template <class T, T v> \ template <class T, T V> \
constexpr inline integral_constant<decltype(op v), (op v)> operator op( \ constexpr inline integral_constant<decltype(op V), (op V)> operator op( \
integral_constant<T, v>) noexcept \ integral_constant<T, V>) noexcept \
{ \ { \
return {}; \ return {}; \
} }
...@@ -64,8 +66,8 @@ using false_type = bool_constant<false>; ...@@ -64,8 +66,8 @@ using false_type = bool_constant<false>;
template <index_int N> template <index_int N>
using index_constant = integral_constant<index_int, N>; using index_constant = integral_constant<index_int, N>;
template <auto v> template <auto V>
static constexpr auto _c = integral_constant<decltype(v), v>{}; static constexpr auto _c = integral_constant<decltype(V), V>{}; // NOLINT
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP #endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment