Commit dd033c75 authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into mlir-c

parents 50f87a87 8829d6ab
#ifndef MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#define MIGRAPHX_GUARD_OPERATORS_NONZERO_HPP
#include <migraphx/shape_for_each.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/par_for.hpp>
#include <cmath>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
struct nonzero
{
std::string name() const { return "nonzero"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(1).standard();
auto elem_num = inputs[0].elements();
auto dim_num = inputs[0].lens().size();
std::vector<std::size_t> out_lens = {dim_num, elem_num};
return {shape::int64_type, out_lens};
}
argument compute(const shape& output_shape, std::vector<argument> args) const
{
std::vector<std::vector<std::size_t>> vec_idx;
auto s = args.front().get_shape();
args.front().visit([&](auto v) {
shape_for_each(s, [&](auto idx) {
if(not float_equal(v[s.index(idx)], 0))
{
vec_idx.push_back(idx);
}
});
});
argument result{output_shape};
result.visit([&](auto output) {
std::fill(output.begin(), output.end(), 0);
par_for(vec_idx.size(), [&](auto i) {
for(std::size_t j = 0; j < vec_idx.front().size(); ++j)
{
output[output_shape.index({j, i})] = vec_idx[i][j];
}
});
});
return result;
}
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -18,21 +18,12 @@ namespace op { ...@@ -18,21 +18,12 @@ namespace op {
struct quant_dot struct quant_dot
{ {
int32_t alpha = 1;
int32_t beta = 1;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.alpha, "alpha"), f(self.beta, "beta"));
}
value attributes() const { return {{"general_data_type", "dot"}}; } value attributes() const { return {{"general_data_type", "dot"}}; }
std::string name() const { return "quant_dot"; } std::string name() const { return "quant_dot"; }
shape compute_shape(std::vector<shape> inputs) const shape compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type(); check_shapes{{inputs.at(0), inputs.at(1)}, *this}.same_type().has(2);
const shape& a = inputs.at(0); const shape& a = inputs.at(0);
const shape& b = inputs.at(1); const shape& b = inputs.at(1);
auto t = a.type(); auto t = a.type();
...@@ -64,18 +55,6 @@ struct quant_dot ...@@ -64,18 +55,6 @@ struct quant_dot
auto out_lens = a.lens(); auto out_lens = a.lens();
out_lens[dim_1] = b.lens()[dim_1]; out_lens[dim_1] = b.lens()[dim_1];
if(inputs.size() == 3 && out_lens != inputs.at(2).lens())
{
MIGRAPHX_THROW("QUANT_DOT: dimension mismatch, operand C: {" +
to_string_range(inputs.at(2).lens()) +
"}, cannot add to operand A * B: {" + to_string_range(out_lens) + "}");
}
if(inputs.size() == 3 && inputs.at(2).type() != shape::int32_type)
{
MIGRAPHX_THROW("QUANT_DOT: operand C type must be int32");
}
return {shape::int32_type, out_lens}; return {shape::int32_type, out_lens};
} }
}; };
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include <migraphx/op/mul.hpp> #include <migraphx/op/mul.hpp>
#include <migraphx/op/multibroadcast.hpp> #include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/neg.hpp> #include <migraphx/op/neg.hpp>
#include <migraphx/op/nonzero.hpp>
#include <migraphx/op/outline.hpp> #include <migraphx/op/outline.hpp>
#include <migraphx/op/pad.hpp> #include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp> #include <migraphx/op/pooling.hpp>
......
#ifndef MIGRAPHX_GUARD_RTGLIB_REMAP_HPP
#define MIGRAPHX_GUARD_RTGLIB_REMAP_HPP
#include <string>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/config.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct module;
/**
* Decompose operators.
*/
struct remap
{
std::string name() const { return "remap"; }
void apply(module& p) const;
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_depthtospace : op_parser<parse_depthtospace>
{
std::vector<op_desc> operators() const { return {{"DepthToSpace"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
auto s = args[0]->get_shape();
// mode attribute of DepthToSpace
auto mode = std::string("DCR");
if(contains(info.attributes, "mode"))
{
mode = info.attributes.at("mode").s(); // DCR or CRD?
}
// blocksize attribute of DepthToSpace
int blocksize = 0;
if(contains(info.attributes, "blocksize"))
{
blocksize = info.attributes.at("blocksize").i();
}
if(blocksize < 1)
{
MIGRAPHX_THROW("DepthToSpace: blocksize is less than 1");
}
// calculate dimensions
auto lens1 = s.lens();
auto lens2 = s.lens();
unsigned long divisor = std::pow(blocksize, 2);
if((lens2[1] % divisor) == 0)
lens2[1] = lens2[1] / divisor;
else
MIGRAPHX_THROW("DepthToSpace: div by blocksize quotient not int ");
lens1.push_back(lens1[2]);
lens1.push_back(lens1[3]);
lens2[2] = lens2[2] * blocksize;
lens2[3] = lens2[3] * blocksize;
lens1[2] = blocksize;
std::vector<int64_t> perm;
if(mode == "DCR")
{
lens1[3] = lens1[1] / divisor;
lens1[1] = blocksize;
perm = {0, 3, 4, 1, 5, 2};
}
else if(mode == "CRD")
{
lens1[1] = lens1[1] / divisor;
lens1[3] = blocksize;
perm = {0, 1, 4, 2, 5, 3};
}
else
MIGRAPHX_THROW("DepthToSpace: mode attribute cannot be read.");
auto temp1 = info.add_instruction(make_op("reshape", {{"dims", lens1}}), args[0]);
auto temp2 = info.add_instruction(make_op("transpose", {{"permutation", perm}}), temp1);
return info.add_instruction(make_op("reshape", {{"dims", lens2}}),
info.make_contiguous(temp2));
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -61,7 +61,7 @@ struct parse_gemm : op_parser<parse_gemm> ...@@ -61,7 +61,7 @@ struct parse_gemm : op_parser<parse_gemm>
? info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[1]) ? info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[1])
: args[1]; : args[1];
auto ret = info.add_instruction(make_op("dot", {{"alpha", 1.0f}, {"beta", 0.0f}}), l1, l2); auto ret = info.add_instruction(make_op("dot"), l1, l2);
if(args.size() == 3) if(args.size() == 3)
{ {
......
...@@ -66,9 +66,7 @@ struct parse_matmul : op_parser<parse_matmul> ...@@ -66,9 +66,7 @@ struct parse_matmul : op_parser<parse_matmul>
make_op("multibroadcast", {{"out_lens", l1_broadcasted_lens}}), l1); make_op("multibroadcast", {{"out_lens", l1_broadcasted_lens}}), l1);
} }
} }
instruction_ref dot_res = info.add_instruction(make_op(opd.op_name), bl0, bl1);
auto dot_res =
info.add_instruction(make_op(opd.op_name, {{"alpha", 1}, {"beta", 0}}), bl0, bl1);
int64_t num_axis = static_cast<int64_t>(dot_res->get_shape().lens().size()); int64_t num_axis = static_cast<int64_t>(dot_res->get_shape().lens().size());
if(is_a_prepended) if(is_a_prepended)
{ {
......
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <random>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_multinomial : op_parser<parse_multinomial>
{
std::vector<op_desc> operators() const { return {{"Multinomial"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int dtype = 6;
if(contains(info.attributes, "dtype"))
dtype = info.attributes.at("dtype").i();
shape::type_t output_type = get_type(dtype);
size_t sample_size = 1;
if(contains(info.attributes, "sample_size"))
sample_size = info.attributes.at("sample_size").i();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
// Subtract the per-batch maximum log-probability, making the per-batch max 0
auto maxes =
info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]);
auto mb_maxes = info.add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
maxes);
auto cdf = info.add_instruction(migraphx::make_op("sub"), args[0], mb_maxes);
// Take the element-wise exponent to get probabilities in the range (0, 1]
cdf = info.add_instruction(migraphx::make_op("exp"), cdf);
// Compute the cumulative density function
cdf = info.add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
// Pre-compute random distribution
std::mt19937 gen(seed);
std::uniform_real_distribution<> dis(0.0, 1.0);
size_t batch_size = args[0]->get_shape().lens().front();
migraphx::shape dist_shape{migraphx::shape::float_type, {batch_size, sample_size}};
std::vector<float> random_dist(batch_size * sample_size);
std::generate(random_dist.begin(), random_dist.end(), [&]() { return dis(gen); });
auto dist_lit = info.add_literal(migraphx::literal{dist_shape, random_dist});
return info.add_instruction(
migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, dist_lit);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -9,7 +9,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -9,7 +9,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace onnx { namespace onnx {
template <class T> template <class T>
std::vector<std::size_t> nonzero_indices(const std::vector<T>& data) static std::vector<std::size_t> nonzero_indices(const std::vector<T>& data)
{ {
std::vector<std::size_t> indices; std::vector<std::size_t> indices;
for(std::size_t i = 0; i < data.size(); ++i) for(std::size_t i = 0; i < data.size(); ++i)
...@@ -31,8 +31,12 @@ struct parse_nonzero : op_parser<parse_nonzero> ...@@ -31,8 +31,12 @@ struct parse_nonzero : op_parser<parse_nonzero>
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
migraphx::argument data_arg = args.back()->eval(); migraphx::argument data_arg = args.back()->eval();
check_arg_empty(data_arg, "PARSE_NONZERO: cannot support non-constant input!"); if(data_arg.empty())
{
return info.add_instruction(make_op("nonzero"), args);
}
else
{
std::vector<std::size_t> indices; std::vector<std::size_t> indices;
data_arg.visit([&](auto val) { data_arg.visit([&](auto val) {
using val_type = std::remove_cv_t<typename decltype(val)::value_type>; using val_type = std::remove_cv_t<typename decltype(val)::value_type>;
...@@ -56,6 +60,7 @@ struct parse_nonzero : op_parser<parse_nonzero> ...@@ -56,6 +60,7 @@ struct parse_nonzero : op_parser<parse_nonzero>
return info.add_literal(literal(out_s, out_data)); return info.add_literal(literal(out_s, out_data));
} }
}
}; };
} // namespace onnx } // namespace onnx
......
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <random>
#include <set>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_randomnormal_ops : op_parser<parse_randomnormal_ops>
{
const std::set<shape::type_t> valid_types = {
shape::float_type, shape::half_type, shape::double_type};
std::vector<op_desc> operators() const { return {{"RandomNormal"}, {"RandomNormalLike"}}; }
instruction_ref parse(const op_desc& opd,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int dtype = 1;
bool use_dtype = false;
if(contains(info.attributes, "dtype"))
{
dtype = info.attributes.at("dtype").i();
use_dtype = true;
}
shape::type_t out_type = get_type(dtype);
if(not contains(valid_types, out_type))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " + std::to_string(dtype) +
". Valid types are 1 (float), 10 (half), and 11 (double).");
float mean = 0.0;
if(contains(info.attributes, "mean"))
mean = info.attributes.at("mean").f();
float scale = 1.0;
if(contains(info.attributes, "scale"))
scale = info.attributes.at("scale").f();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
shape out_shape;
if(contains(info.attributes, "shape"))
{
// RandomNormal:
// output type and shape must come from attributes
std::vector<int> out_lens;
literal ls = parser.parse_value(info.attributes.at("shape"));
ls.visit([&](auto s) { out_lens.assign(s.begin(), s.end()); });
out_shape = shape{out_type, out_lens};
}
else if(args.size() == 1)
{
// RandomNormalLike:
// output type and shape are the same as the input's by default
// dtype is used instead when attribute is set
if(not contains(valid_types, args[0]->get_shape().type()))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " +
std::to_string(args[0]->get_shape().type()) +
". Valid types are float, half, and double.");
out_shape =
use_dtype ? shape{out_type, args[0]->get_shape().lens()} : args[0]->get_shape();
}
else
{
MIGRAPHX_THROW(opd.op_name +
": cannot deduce shape without shape attribute or argument.");
}
std::mt19937 gen(seed);
std::normal_distribution<> d(mean, scale);
std::vector<double> rand_vals(out_shape.elements());
std::generate(rand_vals.begin(), rand_vals.end(), [&]() { return d(gen); });
return info.add_literal(literal{out_shape, rand_vals});
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <random>
#include <set>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_randomuniform_ops : op_parser<parse_randomuniform_ops>
{
const std::set<shape::type_t> valid_types = {
shape::float_type, shape::half_type, shape::double_type};
std::vector<op_desc> operators() const { return {{"RandomUniform"}, {"RandomUniformLike"}}; }
instruction_ref parse(const op_desc& opd,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int dtype = 1;
bool use_dtype = false;
if(contains(info.attributes, "dtype"))
{
dtype = info.attributes.at("dtype").i();
use_dtype = true;
}
shape::type_t out_type = get_type(dtype);
if(not contains(valid_types, out_type))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " + std::to_string(dtype) +
". Valid types are 1 (float), 10 (half), and 11 (double).");
float high = 1.0;
if(contains(info.attributes, "high"))
high = info.attributes.at("high").f();
float low = 0.0;
if(contains(info.attributes, "low"))
low = info.attributes.at("low").f();
float seed = static_cast<float>(
std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed"))
seed = info.attributes.at("seed").f();
shape out_shape;
if(contains(info.attributes, "shape"))
{
// RandomUniform:
// output type and shape must come from attributes
std::vector<int> out_lens;
literal ls = parser.parse_value(info.attributes.at("shape"));
ls.visit([&](auto s) { out_lens.assign(s.begin(), s.end()); });
out_shape = shape{out_type, out_lens};
}
else if(args.size() == 1)
{
// RandomUniformLike:
// output type and shape are the same as the input by default
// dtype is used instead when attribute is set
if(not contains(valid_types, args[0]->get_shape().type()))
MIGRAPHX_THROW(opd.op_name + ": invalid output type: " +
std::to_string(args[0]->get_shape().type()) +
". Valid types are float, half, and double.");
out_shape =
use_dtype ? shape{out_type, args[0]->get_shape().lens()} : args[0]->get_shape();
}
else
{
MIGRAPHX_THROW(opd.op_name +
": cannot deduce shape without shape attribute or argument.");
}
std::mt19937 gen(seed);
std::uniform_real_distribution<> d(high, low);
std::vector<double> rand_vals(out_shape.elements());
std::generate(rand_vals.begin(), rand_vals.end(), [&]() { return d(gen); });
return info.add_literal(literal{out_shape, rand_vals});
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -309,8 +309,11 @@ std::vector<argument> program::eval(parameter_map params) const ...@@ -309,8 +309,11 @@ std::vector<argument> program::eval(parameter_map params) const
double t2 = t.record<milliseconds>(); double t2 = t.record<milliseconds>();
std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl; std::cout << "Time: " << t1 << "ms, " << t2 << "ms" << std::endl;
if(trace_level > 1 and ins->name().front() != '@' and if(trace_level > 1 and ins->name().front() != '@' and
ins->name() != "load") ins->name() != "load" and not result.empty())
std::cout << "Output: " << result << std::endl; {
target tgt = make_target(this->impl->target_name);
std::cout << "Output: " << tgt.copy_from(result) << std::endl;
}
return result; return result;
})); }));
} }
......
#include <migraphx/remap.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/float_equal.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/add.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace {
struct find_dot_add
{
auto matcher() const
{
return match::name("add")(match::any_of(
match::args(match::name("dot")(match::nargs(2)).bind("dot"), match::any().bind("a")),
match::args(match::used_once().bind("a"),
match::name("dot")(match::nargs(2)).bind("dot"))));
}
void apply(module& p, match::matcher_result r) const
{
auto ins = r.result;
auto dot_ins = r.instructions["dot"];
auto a_ins = r.instructions["a"];
auto dot = any_cast<op::dot>(dot_ins->get_operator());
dot.beta = 1;
p.replace_instruction(ins, dot, dot_ins->inputs()[0], dot_ins->inputs()[1], a_ins);
}
};
} // namespace
void remap::apply(module& p) const { match::find_matches(p, find_dot_add{}); }
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -84,13 +84,7 @@ struct match_find_quantizable_ops ...@@ -84,13 +84,7 @@ struct match_find_quantizable_ops
} }
else if(qop->name() == "dot") else if(qop->name() == "dot")
{ {
auto dot_op = any_cast<op::dot>(qop->get_operator()); dq = m.insert_instruction(qop, migraphx::make_op("quant_dot"), qop_args);
if(!(float_equal(dot_op.alpha, 1.0f) and float_equal(dot_op.beta, 0.0f)))
return;
if(qop_args.size() == 3)
qop_args.pop_back();
dq = m.insert_instruction(
qop, migraphx::make_op("quant_dot", {{"alpha", 1}, {"beta", 0}}), qop_args);
} }
auto ins_type = qop->get_shape().type(); auto ins_type = qop->get_shape().type();
dq_scale = m.add_literal(literal({ins_type}, {scale})); dq_scale = m.add_literal(literal({ins_type}, {scale}));
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#include <migraphx/check_context.hpp> #include <migraphx/check_context.hpp>
#include <migraphx/adjust_allocation.hpp> #include <migraphx/adjust_allocation.hpp>
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/decompose.hpp>
#include <migraphx/eliminate_allocation.hpp> #include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp> #include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp> #include <migraphx/eliminate_concat.hpp>
...@@ -14,7 +13,6 @@ ...@@ -14,7 +13,6 @@
#include <migraphx/memory_coloring.hpp> #include <migraphx/memory_coloring.hpp>
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/remap.hpp>
#include <migraphx/rewrite_batchnorm.hpp> #include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp> #include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp> #include <migraphx/rewrite_quantization.hpp>
...@@ -52,8 +50,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti ...@@ -52,8 +50,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{}, dead_code_elimination{},
eliminate_data_type{unsupported_types, shape::type_t::float_type}, eliminate_data_type{unsupported_types, shape::type_t::float_type},
dead_code_elimination{}, dead_code_elimination{},
decompose{},
dead_code_elimination{},
simplify_reshapes{}, simplify_reshapes{},
eliminate_identity{}, eliminate_identity{},
eliminate_pad{}, eliminate_pad{},
......
...@@ -59,6 +59,8 @@ add_library(migraphx_device ...@@ -59,6 +59,8 @@ add_library(migraphx_device
device/mul.cpp device/mul.cpp
device/mul_add.cpp device/mul_add.cpp
device/mul_add_relu.cpp device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp device/pad.cpp
device/pow.cpp device/pow.cpp
device/prelu.cpp device/prelu.cpp
...@@ -143,6 +145,8 @@ add_library(migraphx_gpu ...@@ -143,6 +145,8 @@ add_library(migraphx_gpu
lrn.cpp lrn.cpp
leaky_relu.cpp leaky_relu.cpp
mlir_conv.cpp mlir_conv.cpp
multinomial.cpp
nonzero.cpp
pack_args.cpp pack_args.cpp
pack_int8_args.cpp pack_int8_args.cpp
pad.cpp pad.cpp
...@@ -199,6 +203,8 @@ register_migraphx_gpu_ops(hip_ ...@@ -199,6 +203,8 @@ register_migraphx_gpu_ops(hip_
max max
min min
mul mul
multinomial
nonzero
pad pad
pow pow
prelu prelu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
#include <migraphx/requires.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <class... Ts>
using common_type = typename std::common_type<Ts...>::type;
template <class T, MIGRAPHX_REQUIRES(is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return std::isfinite(x) and std::isfinite(y) and
std::nextafter(x, std::numeric_limits<T>::lowest()) <= y and
std::nextafter(x, std::numeric_limits<T>::max()) >= y;
}
template <class T, MIGRAPHX_REQUIRES(not is_floating_point<T>{})>
__device__ bool float_equal_device(T x, T y)
{
return x == y;
}
template <class T, class U>
__device__ bool float_equal(T x, U y)
{
return float_equal_device<common_type<T, U>>(x, y);
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -12,10 +12,6 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -12,10 +12,6 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
#if __AMDGCN_WAVEFRONT_SIZE == 32
#define MIGRAPHX_NO_DPP
#endif
#ifdef MIGRAPHX_NO_DPP #ifdef MIGRAPHX_NO_DPP
template <index_int N, template <index_int N,
class Op, class Op,
...@@ -98,10 +94,12 @@ __device__ void dpp_reduce(T& in, Op op) ...@@ -98,10 +94,12 @@ __device__ void dpp_reduce(T& in, Op op)
in = op(in, out); in = op(in, out);
out = dpp_mov<dpp_row_shr(8), 0xf, 0xc>(in); out = dpp_mov<dpp_row_shr(8), 0xf, 0xc>(in);
in = op(in, out); in = op(in, out);
#if __AMDGCN_WAVEFRONT_SIZE == 64
out = dpp_mov<dpp_row_bcast(15), 0xa>(in); out = dpp_mov<dpp_row_bcast(15), 0xa>(in);
in = op(in, out); in = op(in, out);
out = dpp_mov<dpp_row_bcast(31), 0xc>(in); out = dpp_mov<dpp_row_bcast(31), 0xc>(in);
in = op(in, out); in = op(in, out);
#endif
} }
__device__ inline void dpp_reduce(float& x, sum) __device__ inline void dpp_reduce(float& x, sum)
...@@ -118,9 +116,11 @@ __device__ inline void dpp_reduce(float& x, sum) ...@@ -118,9 +116,11 @@ __device__ inline void dpp_reduce(float& x, sum)
"s_nop 1\n" "s_nop 1\n"
"v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n" "v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n"
"s_nop 1\n" "s_nop 1\n"
#if __AMDGCN_WAVEFRONT_SIZE == 64
"v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n" "v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n"
"s_nop 1\n" "s_nop 1\n"
"v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n" "v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n"
#endif
"s_nop 1\n" "s_nop 1\n"
: "=v"(x) : "=v"(x)
: "0"(x)); : "0"(x));
...@@ -135,21 +135,27 @@ template <index_int N, ...@@ -135,21 +135,27 @@ template <index_int N,
MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})> MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})>
__device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f) __device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f)
{ {
#if __AMDGCN_WAVEFRONT_SIZE == 32
constexpr index_int nthreads = 16;
#else
constexpr index_int nthreads = 64;
#endif
using type = decltype(f(deduce_for_stride(fs))); using type = decltype(f(deduce_for_stride(fs)));
MIGRAPHX_DEVICE_SHARED type buffer[N / 64]; MIGRAPHX_DEVICE_SHARED type buffer[N / nthreads];
type x = init; type x = init;
fs([&](auto i) { x = op(x, f(i)); }); fs([&](auto i) { x = op(x, f(i)); });
dpp_reduce(x, op); dpp_reduce(x, op);
const auto ldsidx = idx.local / 64; const auto ldsidx = idx.local / nthreads;
if((idx.local % 64) == 63) if((idx.local % nthreads) == nthreads - 1)
{ {
buffer[ldsidx] = x; buffer[ldsidx] = x;
} }
__syncthreads(); __syncthreads();
type y = init; type y = init;
for(index_int i = 0; i < idx.nlocal() / 64; i++) for(index_int i = 0; i < idx.nlocal() / nthreads; i++)
{ {
y = op(y, buffer[i]); y = op(y, buffer[i]);
} }
......
...@@ -129,6 +129,21 @@ __device__ __host__ T to_hip_type(T x) ...@@ -129,6 +129,21 @@ __device__ __host__ T to_hip_type(T x)
// Hip doens't support __fp16 // Hip doens't support __fp16
inline __device__ __host__ float to_hip_type(gpu_half x) { return x; } inline __device__ __host__ float to_hip_type(gpu_half x) { return x; }
#define MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(trait, T) \
template <class X> \
struct trait : std::trait<X> \
{ \
}; \
\
template <> \
struct trait<T> : std::true_type \
{ \
};
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, __fp16)
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -8,6 +8,14 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -8,6 +8,14 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
#ifndef MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC
#if __AMDGCN_WAVEFRONT_SIZE == 32
#define MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC 1
#else
#define MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC 0
#endif
#endif
template <class T> template <class T>
struct vector_type struct vector_type
{ {
...@@ -86,10 +94,13 @@ __device__ void layernorm(index_int i, ...@@ -86,10 +94,13 @@ __device__ void layernorm(index_int i,
const bool in_range = idx.local < relements_v; const bool in_range = idx.local < relements_v;
auto mean = [&](auto z) { auto mean = [&](auto z) {
return auto_block_reduce<MaxBlockSize>( auto m = auto_block_reduce<MaxBlockSize>(
idx, sum{}, value_type(0), relements_v, [=](auto) { return z; }) / idx, sum{}, value_type(0), relements_v, [=](auto) { return z; }) /
value_type(relements); value_type(relements);
#if MIGRAPHX_WORKAROUND_NAVI_DPP_SYNC
__builtin_amdgcn_s_barrier();
#endif
return m;
}; };
// m = x - mean(x) // m = x - mean(x)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment