"src/vscode:/vscode.git/clone" did not exist on "36a5588957730bac6adebfc321ac91b422543278"
Commit d0202590 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'test_runner_match_input_output' into migraphx_for_ort

parents 2e43e30b 414ea291
......@@ -218,7 +218,7 @@ jobs:
run: |
echo "leak:dnnl::impl::malloc" > suppressions.txt
export LSAN_OPTIONS="suppressions=$(pwd)/suppressions.txt"
rbuild build -d cget -s gh -t check \
rbuild build -d cget -s gh -T check \
-DCMAKE_BUILD_TYPE=${{matrix.configuration}} \
-DMIGRAPHX_ENABLE_PYTHON=${{matrix.configuration == 'release' && 'On' || 'Off'}} \
-DCMAKE_CXX_FLAGS_DEBUG="-g1 -Os -fdebug-prefix-map=$PWD=. -fdebug-types-section -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=undefined" \
......
......@@ -26,16 +26,18 @@ cpp_generator::function::set_body(const module& m, const cpp_generator::generate
{
names[ins] =
migraphx::any_cast<migraphx::builtin::param>(ins->get_operator()).parameter;
continue;
}
if(ins->name() == "@return")
else if(ins->name() == "@return")
{
assert(ins->inputs().size() == 1);
return_ins = ins->inputs().front();
}
std::string n = "z" + std::to_string(names.size());
names[ins] = n;
ss << "auto " << n << " = " << g(ins, names) << ";\n";
else
{
std::string n = "z" + std::to_string(names.size());
names[ins] = n;
ss << "auto " << n << " = " << g(ins, names) << ";\n";
}
}
ss << "return " << names.at(return_ins) << ";\n";
body = ss.str();
......@@ -84,8 +86,11 @@ void cpp_generator::fmap(const std::function<std::string(std::string)>& f) { imp
std::string cpp_generator::generate_point_op(const operation& op,
const std::vector<std::string>& args)
{
auto v = op.to_value();
return interpolate_string(op.attributes()["point_op"].to<std::string>(),
auto v = op.to_value();
auto attributes = op.attributes();
if(not attributes.contains("point_op"))
MIGRAPHX_THROW("op is missing point_op attribute: " + op.name());
return interpolate_string(attributes["point_op"].to<std::string>(),
[&](auto start, auto last) -> std::string {
auto key = trim({start, last});
if(key.empty())
......@@ -120,7 +125,12 @@ std::string cpp_generator::str() const { return impl->fs.str(); }
cpp_generator::function cpp_generator::generate_module(const module& m)
{
function f;
f.set_name(m.name()).set_types(m).set_body(
auto name = transform_string(m.name(), [](char c) {
if(with_char(::isalnum)(c) or c == '_')
return c;
return '_';
});
f.set_name(name).set_types(m).set_body(
m, [&](instruction_ref ins, const auto& names) -> std::string {
if(ins->name() == "@literal")
return shape::cpp_type(ins->get_shape().type()) + "(" +
......@@ -130,7 +140,6 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
ins->inputs().end(),
std::back_inserter(args),
[&](auto i) { return names.at(i); });
auto s = this->generate_point_op(ins->get_operator(), args);
return this->generate_point_op(ins->get_operator(), args);
});
return f;
......
......@@ -17,6 +17,7 @@
#include <migraphx/type_name.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/rank.hpp>
namespace migraphx {
namespace driver {
......@@ -106,10 +107,22 @@ struct argument_parser
return to_string_range(x);
}
template <class T>
auto as_string_value(rank<1>, const T& x) -> decltype(to_string(x))
{
return to_string(x);
}
template <class T>
std::string as_string_value(rank<0>, const T&)
{
throw std::runtime_error("Can't convert to string");
}
template <class T, MIGRAPHX_REQUIRES(not is_multi_value<T>{})>
std::string as_string_value(const T& x)
{
return to_string(x);
return as_string_value(rank<1>{}, x);
}
template <class T, class... Fs>
......@@ -122,10 +135,11 @@ struct argument_parser
return false;
}});
argument& arg = arguments.back();
arg.type = migraphx::get_type_name<T>();
arg.default_value = as_string_value(x);
argument& arg = arguments.back();
arg.type = migraphx::get_type_name<T>();
migraphx::each_args([&](auto f) { f(x, arg); }, fs...);
if(not arg.default_value.empty() and arg.nargs > 0)
arg.default_value = as_string_value(x);
}
template <class... Fs>
......
#include "verify.hpp"
#include "argument_parser.hpp"
#include "command.hpp"
#include "verify.hpp"
#include "precision.hpp"
#include "perf.hpp"
#include "models.hpp"
#include "marker_roctx.hpp"
......@@ -288,14 +289,12 @@ struct compiler_target
struct compiler
{
static const int q_fp16 = 1;
static const int q_int8 = 2;
loader l;
program_params parameters;
compiler_target ct;
bool offload_copy = false;
bool fast_math = true;
int quantize = 0;
bool offload_copy = false;
bool fast_math = true;
precision quantize = precision::fp32;
std::vector<std::string> fill0;
std::vector<std::string> fill1;
......@@ -312,8 +311,8 @@ struct compiler
{"--disable-fast-math"},
ap.help("Disable fast math optimization"),
ap.set_value(false));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(q_fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(precision::int8));
}
auto params(const program& p) { return parameters.generate(p, ct.get_target(), offload_copy); }
......@@ -325,11 +324,11 @@ struct compiler
if(p.is_compiled())
return p;
auto t = ct.get_target();
if(quantize == q_fp16)
if(quantize == precision::fp16)
{
quantize_fp16(p);
}
else if(quantize == q_int8)
else if(quantize == precision::int8)
{
quantize_int8(p, t, {params(p)});
}
......@@ -377,6 +376,7 @@ struct verify : command<verify>
bool reduce = false;
bool offload_copy = false;
bool fast_math = true;
precision quantize = precision::fp32;
void parse(argument_parser& ap)
{
l.parse(ap);
......@@ -396,6 +396,7 @@ struct verify : command<verify>
ap.help("Verify each instruction"),
ap.set_value(true));
ap(reduce, {"-r", "--reduce"}, ap.help("Reduce program and verify"), ap.set_value(true));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
}
void run()
......@@ -412,15 +413,15 @@ struct verify : command<verify>
if(per_instruction)
{
verify_instructions(p, t, options, tolerance);
verify_instructions(p, t, options, quantize, tolerance);
}
else if(reduce)
{
verify_reduced_program(p, t, options, m, tolerance);
verify_reduced_program(p, t, options, quantize, m, tolerance);
}
else
{
verify_program(l.file, p, t, options, m, tolerance);
verify_program(l.file, p, t, options, quantize, m, tolerance);
}
}
};
......@@ -480,7 +481,7 @@ struct perf : command<perf>
std::cout << "Allocating params ... " << std::endl;
auto m = c.params(p);
std::cout << "Running performance report ... " << std::endl;
p.perf_report(std::cout, n, m);
p.perf_report(std::cout, n, m, c.l.batch);
}
};
......
#ifndef MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
#define MIGRAPHX_GUARD_RTGLIB_PRECISION_HPP
namespace migraphx {
namespace driver {
inline namespace MIGRAPHX_INLINE_NS {
enum class precision
{
fp32,
fp16,
int8
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace driver
} // namespace migraphx
#endif
......@@ -6,6 +6,7 @@
#include <migraphx/verify_args.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/quantization.hpp>
namespace migraphx {
namespace driver {
......@@ -19,9 +20,16 @@ std::vector<argument> run_ref(program p, const parameter_map& inputs)
return out;
}
std::vector<argument>
run_target(program p, const target& t, const compile_options& options, const parameter_map& inputs)
std::vector<argument> run_target(program p,
const target& t,
const compile_options& options,
precision quantize,
const parameter_map& inputs)
{
if(quantize == precision::fp16)
{
quantize_fp16(p);
}
p.compile(t, options);
parameter_map m;
......@@ -43,24 +51,24 @@ void verify_program(const std::string& name,
const program& p,
const target& t,
compile_options options,
precision quantize,
const parameter_map& inputs,
double tolerance)
{
auto x = run_ref(p, inputs);
auto y = run_target(p, t, options, inputs);
auto y = run_target(p, t, options, quantize, inputs);
std::size_t output_num = x.size();
for(std::size_t i = 0; i < output_num; ++i)
{
verify_args(name, x[i], y[i], tolerance);
}
// std::cout << "cpu: " << x << std::endl;
// std::cout << "gpu: " << y << std::endl;
}
void verify_instructions(const program& prog,
const target& t,
compile_options options,
precision quantize,
double tolerance)
{
const auto* mm_prog = prog.get_main_module();
......@@ -92,7 +100,8 @@ void verify_instructions(const program& prog,
{
std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl;
verify_program(ins.name(), p, t, options, create_param_map(p, false), tolerance);
verify_program(
ins.name(), p, t, options, quantize, create_param_map(p, false), tolerance);
}
catch(...)
{
......@@ -106,6 +115,7 @@ void verify_reduced(program p,
int n,
const target& t,
compile_options options,
precision quantize,
const parameter_map& inputs,
double tolerance)
{
......@@ -114,12 +124,13 @@ void verify_reduced(program p,
mm->remove_instructions(last, mm->end());
std::cout << "Verify: " << std::endl;
std::cout << p << std::endl;
verify_program(std::to_string(n), p, t, options, inputs, tolerance);
verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
}
void verify_reduced_program(const program& p,
const target& t,
compile_options options,
precision quantize,
const parameter_map& inputs,
double tolerance)
{
......@@ -127,7 +138,7 @@ void verify_reduced_program(const program& p,
auto n = std::distance(mm->begin(), mm->end());
for(std::size_t i = 0; i < n; i++)
{
verify_reduced(p, i, t, options, inputs, tolerance);
verify_reduced(p, i, t, options, quantize, inputs, tolerance);
}
}
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#define MIGRAPHX_GUARD_RTGLIB_DRIVER_VERIFY_HPP
#include "precision.hpp"
#include <migraphx/program.hpp>
namespace migraphx {
......@@ -11,15 +12,18 @@ void verify_program(const std::string& name,
const program& p,
const target& t,
compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {},
double tolerance = 100);
void verify_instructions(const program& prog,
const target& t,
compile_options options = compile_options{},
precision quantize = precision::fp32,
double tolerance = 80);
void verify_reduced_program(const program& p,
const target& t,
compile_options options = compile_options{},
precision quantize = precision::fp32,
const parameter_map& inputs = {},
double tolerance = 80);
......
......@@ -11,11 +11,13 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inputs)
static bool try_compute_shape(instruction_ref ins,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mods)
{
try
{
shape new_shape = ins->get_operator().compute_shape(inputs);
shape new_shape = ins->get_operator().compute_shape(inputs, mods);
// If the output shape is a standard shape, no need to try its output
if(new_shape.standard())
{
......@@ -45,7 +47,7 @@ static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inp
return (arg == ins) ? new_shape : arg->get_shape();
});
if(!try_compute_shape(output, input_shapes))
if(!try_compute_shape(output, input_shapes, mods))
{
return false;
}
......@@ -59,10 +61,12 @@ static bool try_compute_shape(instruction_ref ins, const std::vector<shape>& inp
return true;
}
static bool try_compute_shape(instruction_ref ins, const std::vector<instruction_ref>& args)
static bool try_compute_shape(instruction_ref ins,
const std::vector<instruction_ref>& args,
const std::vector<module_ref>& mods)
{
auto inputs = to_shapes(args);
return try_compute_shape(ins, inputs);
return try_compute_shape(ins, inputs, mods);
}
void eliminate_contiguous::apply(module& p) const
......@@ -82,7 +86,7 @@ void eliminate_contiguous::apply(module& p) const
auto new_args = args;
auto prev = arg->inputs().front();
replace(new_args, arg, prev);
if(try_compute_shape(ins, new_args))
if(try_compute_shape(ins, new_args, ins->module_inputs()))
{
instruction::replace_argument(ins, arg, prev);
}
......
......@@ -13,6 +13,8 @@ inline namespace MIGRAPHX_INLINE_NS {
static literal get_scalar(instruction_ref ins)
{
if(ins->name() == "contiguous")
return get_scalar(ins->inputs().front());
const auto& s = ins->get_shape();
if(not(s.elements() == 1 or s.scalar()))
return {};
......@@ -31,11 +33,16 @@ static void create_pointwise_modules(module_pass_manager& mpm)
{
if(not ins->get_operator().attributes().get("pointwise", false))
continue;
auto* pm = mpm.create_module("pointwise" + std::to_string(n++));
// Skip convert op for now
if(ins->name() == "convert")
continue;
assert(ins->get_operator().attributes().contains("point_op"));
auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++));
pm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> param_map;
std::vector<instruction_ref> pointwise_inputs;
std::size_t i = 0;
for(auto input : ins->inputs())
{
if(contains(param_map, input))
......@@ -44,8 +51,9 @@ static void create_pointwise_modules(module_pass_manager& mpm)
if(scalar.empty())
{
pointwise_inputs.push_back(input);
param_map[input] = pm->add_parameter("x" + std::to_string(param_map.size()),
shape{input->get_shape().type()});
param_map[input] =
pm->add_parameter("x" + std::to_string(i), shape{input->get_shape().type()});
i++;
}
else
{
......@@ -68,6 +76,7 @@ static void create_pointwise_modules(module_pass_manager& mpm)
static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
instruction_ref output)
{
assert(contains(output->inputs(), ins));
module_ref pm = ins->module_inputs().at(0);
module_ref xm = output->module_inputs().at(0);
......@@ -75,14 +84,18 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
assert(last->name() == "@return");
assert(last->inputs().size() == 1);
assert(pm->get_parameter_names().size() == ins->inputs().size());
assert(xm->get_parameter_names().size() == output->inputs().size());
std::vector<instruction_ref> inputs = ins->inputs();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
std::unordered_map<instruction_ref, instruction_ref> input_map;
// Copy inputs to input_map
for(auto i : range(inputs.size()))
{
auto input = inputs[i];
auto param = pm->get_parameter("x" + std::to_string(i));
auto input = inputs[i];
auto param = pm->get_parameter("x" + std::to_string(i));
assert(param != pm->end());
input_map[input] = param;
}
// Add the new parameter and additional inputs
......@@ -90,6 +103,7 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
{
auto input = output->inputs()[i];
auto param = xm->get_parameter("x" + std::to_string(i));
assert(param != xm->end());
if(input == ins)
{
map_ins[param] = last->inputs().front();
......
......@@ -26,19 +26,17 @@ struct pointwise
auto pnames = pm->get_parameter_names();
std::sort(pnames.begin(), pnames.end());
check_shapes{inputs, *this}.has(pnames.size()).same_dims();
for(auto i : range(pnames.size()))
{
auto s1 = pm->get_parameter(pnames[i])->get_shape();
auto s2 = inputs[i];
if(s1.type() != s2.type())
MIGRAPHX_THROW("Mismatch type");
}
if(pm->get_output_shapes().size() != 1)
MIGRAPHX_THROW("submodule should have only one output.");
auto type = pm->get_output_shapes().front().type();
// Scalar output if all inputs are scalar
if(inputs.front().elements() == 1 and
all_of(inputs, [](const auto& s) { return s.scalar(); }))
return shape{type};
return shape::from_permutation(type, inputs.front().lens(), find_permutation(inputs));
}
......
......@@ -9,6 +9,7 @@ namespace op {
struct prelu : binary<prelu>
{
std::string point_op() const { return "(${0} < 0) ? (${0} * ${1}) : ${0}"; }
auto apply() const
{
return [](auto x, auto slope) { return ((x < 0) ? (x * slope) : x); };
......
......@@ -9,6 +9,7 @@ namespace op {
struct recip : unary<recip>
{
std::string point_op() const { return "1 / ${0}"; }
auto apply() const
{
return [](auto x) { return 1 / x; };
......
......@@ -18,6 +18,7 @@ namespace op {
struct sigmoid : unary<sigmoid>
{
std::string point_op() const { return "1.f / (1.f + ${function:exp}(-${0}))"; }
auto apply() const
{
return [](auto x) { return 1.f / (1.f + std::exp(-x)); };
......
......@@ -18,6 +18,7 @@ namespace op {
struct sign : unary<sign>
{
std::string point_op() const { return "(${0} > 0 ? 1 : ((${0} < 0) ? -1 : 0))"; }
auto apply() const
{
return [](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); };
......
......@@ -103,79 +103,69 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
} // namespace operation_operators
template <class T>
auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs))
{
dependent_type<operation, T> y = x;
normalize_attributes(y, inputs[0].lens());
return any_cast<T>(y).normalize_compute_shape(inputs);
}
template <class T>
shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
auto compute_shape_op(rank<3>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs))
{
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
return x.compute_shape(inputs);
}
template <class T>
shape normalize_compute_shape_op(const T& x, const std::vector<shape>& inputs)
auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs))
{
return normalize_compute_shape_op(rank<1>{}, x, inputs);
dependent_type<operation, T> y = x;
normalize_attributes(y, inputs[0].lens());
return any_cast<T>(y).normalize_compute_shape(inputs);
}
template <class T>
auto compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
-> decltype(x.compute_shape(inputs, mod_args))
auto compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs, {}))
{
return x.compute_shape(inputs, mod_args);
return x.compute_shape(inputs, {});
}
template <class T>
shape
compute_shape_op(rank<0>, const T& x, const std::vector<shape>&, const std::vector<module_ref>&)
shape compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
{
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
shape compute_shape_op(const T& x, const std::vector<shape>& inputs)
{
return compute_shape_op(rank<1>{}, x, inputs, mod_args);
return compute_shape_op(rank<3>{}, x, inputs);
}
template <class T>
auto normalize_compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args)
-> decltype(x.normalize_compute_shape(inputs, mod_args))
auto mod_compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
-> decltype(x.compute_shape(inputs, mod_args))
{
return x.normalize_compute_shape(inputs, mod_args);
return x.compute_shape(inputs, mod_args);
}
template <class T>
shape normalize_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>&,
const std::vector<module_ref>&)
shape mod_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
if(mod_args.empty())
return compute_shape_op(x, inputs);
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape normalize_compute_shape_op(const T& x,
const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args)
shape mod_compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return normalize_compute_shape_op(rank<1>{}, x, inputs, mod_args);
return mod_compute_shape_op(rank<1>{}, x, inputs, mod_args);
}
template <class T>
......@@ -848,7 +838,7 @@ struct operation
T&& private_detail_te_self,
const std::vector<shape>& input)
{
return detail::normalize_compute_shape_op(private_detail_te_self, input);
return detail::compute_shape_op(private_detail_te_self, input);
}
template <class T>
......@@ -867,7 +857,7 @@ struct operation
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return detail::compute_shape_op(private_detail_te_self, inputs, mod_args);
return detail::mod_compute_shape_op(private_detail_te_self, inputs, mod_args);
}
template <class T>
......@@ -1269,7 +1259,7 @@ template <class T>
inline auto compute_shape(const T& op, const std::vector<shape>& inputs)
-> decltype(op.normalize_compute_shape(inputs))
{
return detail::normalize_compute_shape_op(op, inputs);
return detail::compute_shape_op(op, inputs);
}
inline shape compute_shape(const operation& op,
......@@ -1294,7 +1284,7 @@ inline auto compute_shape(const T& op,
const std::vector<module_ref>& mod_args)
-> decltype(op.normalize_compute_shape(inputs, mod_args))
{
return detail::normalize_compute_shape_op(op, inputs, mod_args);
return detail::compute_shape_op(op, inputs, mod_args);
}
inline bool is_context_free(const operation& op) { return op.is_context_free(); }
......
......@@ -67,7 +67,8 @@ struct program
void finalize();
void perf_report(std::ostream& os, std::size_t n, parameter_map params) const;
void
perf_report(std::ostream& os, std::size_t n, parameter_map params, std::size_t batch = 1) const;
void mark(const parameter_map& params, marker&& m);
......
......@@ -18,7 +18,7 @@ inline namespace MIGRAPHX_INLINE_NS {
template <class F>
auto with_char(F f)
{
return [=](unsigned char c) { return f(c); };
return [=](unsigned char c) -> bool { return f(c); };
}
inline std::string
......@@ -120,22 +120,27 @@ interpolate_string(const std::string& input, F f, std::string start = "${", std:
result.append(it, next_start);
if(next_start == input.end())
break;
auto r = f(next_start + start.size(), next_end - end.size() + 1);
auto r = f(next_start + start.size(), next_end);
result.append(r.begin(), r.end());
it = next_end + 1;
it = next_end + end.size();
}
return result;
}
inline std::string interpolate_string(const std::string& input,
const std::unordered_map<std::string, std::string>& vars)
{
return interpolate_string(input, [&](auto start, auto last) {
auto key = trim({start, last});
auto it = vars.find(key);
if(it == vars.end())
throw std::runtime_error("Unknown key: " + key);
return it->second;
});
const std::unordered_map<std::string, std::string>& vars,
std::string start = "${",
std::string end = "}")
{
return interpolate_string(input,
[&](auto start_it, auto last_it) {
auto key = trim({start_it, last_it});
auto it = vars.find(key);
if(it == vars.end())
throw std::runtime_error("Unknown key: " + key);
return it->second;
},
std::move(start),
std::move(end));
}
template <class Iterator>
......@@ -163,7 +168,8 @@ inline std::string to_string_range(const std::initializer_list<T>& r)
}
template <class T>
inline std::string to_string(const T& x)
inline auto to_string(const T& x)
-> decltype((std::declval<std::stringstream>() << x), std::string{})
{
std::stringstream ss;
ss << x;
......
......@@ -15,6 +15,8 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_PASSES);
void validate_pass(module& mod, const pass& p, tracer trace)
{
(void)mod;
......@@ -82,6 +84,8 @@ module& get_module(module_pass_manager& mpm) { return mpm.get_module(); }
void run_passes(module& mod, const std::vector<pass>& passes, tracer trace)
{
if(enabled(MIGRAPHX_TRACE_PASSES{}))
trace = tracer{std::cout};
for(const auto& p : passes)
{
module_pm{&mod, nullptr, &trace}.run_pass(p);
......@@ -90,6 +94,8 @@ void run_passes(module& mod, const std::vector<pass>& passes, tracer trace)
void run_passes(program& prog, const std::vector<pass>& passes, tracer trace)
{
if(enabled(MIGRAPHX_TRACE_PASSES{}))
trace = tracer{std::cout};
for(const auto& p : passes)
{
auto mods = prog.get_modules();
......
......@@ -526,7 +526,10 @@ void program::mark(const parameter_map& params, marker&& m)
m.mark_stop(*this);
}
void program::perf_report(std::ostream& os, std::size_t n, parameter_map params) const
void program::perf_report(std::ostream& os,
std::size_t n,
parameter_map params,
std::size_t batch) const
{
auto& ctx = this->impl->ctx;
// Run once by itself
......@@ -619,7 +622,8 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
os << std::endl;
os << "Rate: " << rate << "/sec" << std::endl;
os << "Batch size: " << batch << std::endl;
os << "Rate: " << rate * batch << "/sec" << std::endl;
os << "Total time: " << total_time << "ms" << std::endl;
os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
os << "Overhead time: " << overhead_time << "ms"
......
......@@ -91,28 +91,34 @@ add_library(migraphx_device
device/unary_not.cpp
device/where.cpp
)
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_compile_options(migraphx_device PRIVATE -std=c++17 -fno-gpu-rdc -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(migraphx_device migraphx hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
set(AMDGPU_TARGETS "gfx803;gfx900;gfx906" CACHE STRING "")
foreach(AMDGPU_TARGET ${AMDGPU_TARGETS})
target_compile_options(migraphx_device PRIVATE -amdgpu-target=${AMDGPU_TARGET})
target_link_libraries(migraphx_device -amdgpu-target=${AMDGPU_TARGET})
endforeach()
else()
target_compile_options(migraphx_device PRIVATE -Wno-cuda-compat)
endif()
add_library(compile_for_gpu INTERFACE)
target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
if(HAS_HIP_LAMBDA_HOST_DEVICE)
message(STATUS "Enable -fhip-lambda-host-device")
target_compile_options(migraphx_device PRIVATE -fhip-lambda-host-device)
target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device)
endif()
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries(migraphx_device PRIVATE compile_for_gpu)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(kernel_file_check EXCLUDE_FROM_ALL)
foreach(KERNEL_FILE ${KERNEL_FILES})
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
endforeach()
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check(kernel_file_check)
add_library(migraphx_gpu
abs.cpp
analyze_streams.cpp
......@@ -122,6 +128,7 @@ add_library(migraphx_gpu
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
compile_ops.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_pointwise.cpp
......@@ -340,7 +347,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver)
rocm_install_targets(
TARGETS migraphx_gpu migraphx_device
TARGETS migraphx_gpu migraphx_device compile_for_gpu
INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment