Unverified Commit ca300bd6 authored by Chris Austen's avatar Chris Austen Committed by GitHub
Browse files

Merge branch 'develop' into blas_tuning

parents 5adb597c e7486577
...@@ -35,7 +35,7 @@ fastjsonschema==2.16.3 ...@@ -35,7 +35,7 @@ fastjsonschema==2.16.3
# via rocm-docs-core # via rocm-docs-core
gitdb==4.0.10 gitdb==4.0.10
# via gitpython # via gitpython
gitpython==3.1.32 gitpython==3.1.37
# via rocm-docs-core # via rocm-docs-core
idna==3.4 idna==3.4
# via requests # via requests
...@@ -87,7 +87,7 @@ requests==2.28.2 ...@@ -87,7 +87,7 @@ requests==2.28.2
# via # via
# pygithub # pygithub
# sphinx # sphinx
rocm-docs-core==0.24.2 rocm-docs-core==0.26.0
# via -r requirements.in # via -r requirements.in
smmap==5.0.0 smmap==5.0.0
# via gitdb # via gitdb
...@@ -130,7 +130,7 @@ sphinxcontrib-serializinghtml==1.1.5 ...@@ -130,7 +130,7 @@ sphinxcontrib-serializinghtml==1.1.5
# via sphinx # via sphinx
typing-extensions==4.5.0 typing-extensions==4.5.0
# via pydata-sphinx-theme # via pydata-sphinx-theme
urllib3==1.26.15 urllib3==1.26.18
# via requests # via requests
wrapt==1.15.0 wrapt==1.15.0
# via deprecated # via deprecated
...@@ -187,6 +187,13 @@ struct value_parser ...@@ -187,6 +187,13 @@ struct value_parser
} }
}; };
// version for std::optional object
template <class T>
struct value_parser<std::optional<T>>
{
static T apply(const std::string& x) { return value_parser<T>::apply(x); }
};
struct argument_parser struct argument_parser
{ {
struct argument struct argument
......
...@@ -540,22 +540,17 @@ struct params : command<params> ...@@ -540,22 +540,17 @@ struct params : command<params>
struct verify : command<verify> struct verify : command<verify>
{ {
compiler c; compiler c;
// Set to -1. as nonsense initial value std::optional<double> rms_tol;
double rms_tol = -1.0; std::optional<double> atol;
double atol = -1.0; std::optional<double> rtol;
double rtol = -1.0;
bool per_instruction = false; bool per_instruction = false;
bool reduce = false; bool reduce = false;
void parse(argument_parser& ap) void parse(argument_parser& ap)
{ {
c.parse(ap); c.parse(ap);
ap(rms_tol, {"--rms-tol"}, ap.help("Tolerance for the RMS error (Default: 0.001)")); ap(rms_tol, {"--rms-tol"}, ap.help("Tolerance for the RMS error"));
ap(atol, ap(atol, {"--atol"}, ap.help("Tolerance for the elementwise absolute difference"));
{"--atol"}, ap(rtol, {"--rtol"}, ap.help("Tolerance for the elementwise relative difference"));
ap.help("Tolerance for the elementwise absolute difference (Default: 0.001)"));
ap(rtol,
{"--rtol"},
ap.help("Tolerance for the elementwise relative difference (Default: 0.001)"));
ap(per_instruction, ap(per_instruction,
{"-i", "--per-instruction"}, {"-i", "--per-instruction"},
ap.help("Verify each instruction"), ap.help("Verify each instruction"),
...@@ -572,33 +567,6 @@ struct verify : command<verify> ...@@ -572,33 +567,6 @@ struct verify : command<verify>
auto t = c.ct.get_target(); auto t = c.ct.get_target();
auto m = c.parameters.generate(p, t, true, c.l.batch); auto m = c.parameters.generate(p, t, true, c.l.batch);
// TODO remove this and make the driver able to figure out datatype most used in the model
// then set the tolerances appropriately. Need to check here because c.to_fp16 only set
// after argument_parser.parse() is run. This code is complicated because there's not a
// good way to change the default tolerances after reading `--fp16` but before reading
// `--rms-tol`, `--atol`, and `--rtol`.
migraphx::verify::tolerance tols{};
if(c.to_fp16)
{
tols = migraphx::verify::tolerance{8e-2, 4e-2, 4e-2};
}
if(not float_equal(this->rms_tol, -1.0))
{
tols.rms_tol = this->rms_tol;
}
if(not float_equal(this->atol, -1.0))
{
tols.atol = this->atol;
}
if(not float_equal(this->rtol, -1.0))
{
tols.rtol = this->rtol;
}
std::cout << "rms_tol: " << tols.rms_tol << std::endl;
std::cout << "atol: " << tols.atol << std::endl;
std::cout << "rtol: " << tols.rtol << std::endl;
auto quantize = precision::fp32; auto quantize = precision::fp32;
if(c.to_fp16) if(c.to_fp16)
{ {
...@@ -609,6 +577,11 @@ struct verify : command<verify> ...@@ -609,6 +577,11 @@ struct verify : command<verify>
quantize = precision::int8; quantize = precision::int8;
} }
auto tols = get_tolerances(p, quantize, rms_tol, atol, rtol);
std::cout << "rms_tol: " << tols.rms_tol << std::endl;
std::cout << "atol: " << tols.atol << std::endl;
std::cout << "rtol: " << tols.rtol << std::endl;
if(per_instruction) if(per_instruction)
{ {
verify_instructions(p, t, c.co, quantize, tols); verify_instructions(p, t, c.co, quantize, tols);
......
...@@ -36,6 +36,42 @@ namespace migraphx { ...@@ -36,6 +36,42 @@ namespace migraphx {
namespace driver { namespace driver {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
/**
* Gives tolerances based on user input (`rms_tol`, `atol`, `rtol` parameters) and defaults.
* Sets to fp16 tolerances if `quantize` input is fp16 or any fp16 instruction in found in the
* model.
*/
verify::tolerance get_tolerances(const program& p,
precision quantize,
std::optional<double> rms_tol,
std::optional<double> atol,
std::optional<double> rtol)
{
bool has_fp16 = any_of(p.get_modules(), [](auto&& m) {
return any_of(*m, [](auto&& ins) { return (ins.get_shape().type() == shape::half_type); });
});
migraphx::verify::tolerance result{};
if(has_fp16 or quantize == precision::fp16)
{
result.rms_tol = 8e-2;
result.atol = 4e-2;
result.rtol = 4e-2;
}
if(rms_tol)
{
result.rms_tol = *rms_tol;
}
if(atol)
{
result.atol = *atol;
}
if(rtol)
{
result.rtol = *rtol;
}
return result;
}
std::vector<argument> run_ref(program p, const parameter_map& inputs) std::vector<argument> run_ref(program p, const parameter_map& inputs)
{ {
p.compile(migraphx::make_target("ref")); p.compile(migraphx::make_target("ref"));
......
...@@ -32,6 +32,12 @@ namespace migraphx { ...@@ -32,6 +32,12 @@ namespace migraphx {
namespace driver { namespace driver {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
verify::tolerance get_tolerances(const program& p,
precision quantize,
std::optional<double> rms_tol,
std::optional<double> atol,
std::optional<double> rtol);
void verify_program(const std::string& name, void verify_program(const std::string& name,
const program& p, const program& p,
const target& t, const target& t,
......
...@@ -46,7 +46,7 @@ struct MIGRAPHX_EXPORT argument : raw_data<argument> ...@@ -46,7 +46,7 @@ struct MIGRAPHX_EXPORT argument : raw_data<argument>
{ {
argument() = default; argument() = default;
argument(const shape& s); explicit argument(const shape& s);
template <class F, MIGRAPHX_REQUIRES(std::is_pointer<decltype(std::declval<F>()())>{})> template <class F, MIGRAPHX_REQUIRES(std::is_pointer<decltype(std::declval<F>()())>{})>
argument(shape s, F d) argument(shape s, F d)
......
...@@ -88,13 +88,13 @@ struct allocate ...@@ -88,13 +88,13 @@ struct allocate
{ {
if(args.empty()) if(args.empty())
{ {
return {output_shape}; return argument{output_shape};
} }
else else
{ {
std::vector<std::size_t> output_dims(output_shape.ndim()); std::vector<std::size_t> output_dims(output_shape.ndim());
args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); }); args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
return {shape{buf_type, output_dims}}; return argument{shape{buf_type, output_dims}};
} }
} }
}; };
......
...@@ -411,7 +411,7 @@ struct pooling ...@@ -411,7 +411,7 @@ struct pooling
// for dynamic GlobalPooling, there's no padding // for dynamic GlobalPooling, there's no padding
kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end()); kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
output_shape = dyn_out.computed_shape; output_shape = dyn_out.computed_shape;
result = dyn_out.computed_shape; result = argument{dyn_out.computed_shape};
} }
else if((padding_mode != op::padding_mode_t::default_)) else if((padding_mode != op::padding_mode_t::default_))
{ {
...@@ -439,7 +439,7 @@ struct pooling ...@@ -439,7 +439,7 @@ struct pooling
{ {
kernel_dims = this->lengths; kernel_dims = this->lengths;
output_shape = dyn_out.computed_shape; output_shape = dyn_out.computed_shape;
result = dyn_out.computed_shape; result = argument{dyn_out.computed_shape};
} }
// Perform the computation and populate result // Perform the computation and populate result
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_groupnorm : op_parser<parse_groupnorm>
{
std::vector<op_desc> operators() const { return {{"GroupNormalization"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
float epsilon = 1e-5f;
if(contains(info.attributes, "epsilon"))
{
epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
}
size_t num_groups;
if(contains(info.attributes, "num_groups"))
{
num_groups = parser.parse_value(info.attributes.at("num_groups")).at<size_t>();
}
else
{
MIGRAPHX_THROW("PARSE_GROUPNORM: num_groups must be available");
}
if(args.size() != 3)
{
MIGRAPHX_THROW("PARSE_GROUPNORM: invalid input count");
}
auto x = args.at(0);
auto scale = args.at(1);
auto bias = args.at(2);
auto x_shape = x->get_shape();
auto x_dtype = x_shape.type();
auto x_dims = x_shape.lens();
if(x_shape.ndim() <= 2)
{
MIGRAPHX_THROW("PARSE_GROUPNORM: invalid input shape");
}
auto c = x_shape.lens().at(1);
if(c % num_groups != 0)
{
MIGRAPHX_THROW(
"PARSE_GROUPNORM: num_groups should be a divisor of the number of channels");
}
auto group_size = c / num_groups;
if(scale->get_shape().ndim() != 1 or scale->get_shape().lens().at(0) != num_groups)
{
MIGRAPHX_THROW("PARSE_GROUPNORM: scale tensor shape should be num_groups");
}
if(bias->get_shape().ndim() != 1 or bias->get_shape().lens().at(0) != num_groups)
{
MIGRAPHX_THROW("PARSE_GROUPNORM: bias tensor shape should be num_groups");
}
// Original shape: N x C x D1 x ... x Dn
// New shape: N x num_groups x C // num_groups x D1 x ... x Dn
std::vector<size_t> dims = {x_dims.at(0), num_groups, group_size};
std::copy(x_dims.begin() + 2, x_dims.end(), std::back_inserter(dims));
auto x_reshaped = info.add_instruction(make_op("reshape", {{"dims", dims}}), x);
// Axes for D1 x ... x Dn
std::vector<size_t> axes(dims.size() - 2);
std::iota(axes.begin(), axes.end(), 2);
// y = (x - mean) * rsqrt(variance + epsilon) * scale + bias
// mean = reduce_mean({D1, D2, ... Dk}, x)
// variance = reduce_mean({D1, D2, ... Dk}, (x - mean)^2)
auto mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x_reshaped);
auto x_sub_mean = info.add_common_op("sub", x_reshaped, mean);
auto x_sqdiff_mean = info.add_common_op("sqdiff", x_reshaped, mean);
auto variance =
info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x_sqdiff_mean);
epsilon =
(x_dtype == migraphx::shape::half_type and std::abs(epsilon) < 1e-7) ? 1e-7 : epsilon;
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_dtype}, {epsilon}});
auto var_eps = info.add_common_op("add", variance, eps);
auto rsqrt = info.add_instruction(make_op("rsqrt"), var_eps);
auto result = info.add_common_op("mul", x_sub_mean, rsqrt);
auto scale_bcast =
info.add_instruction(make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), scale);
auto bias_bcast =
info.add_instruction(make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), bias);
auto scaled = info.add_instruction(make_op("mul"), result, scale_bcast);
auto y = info.add_instruction(make_op("add"), scaled, bias_bcast);
auto y_reshaped = info.add_instruction(make_op("reshape", {{"dims", x_dims}}), y);
return y_reshaped;
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_layernorm : op_parser<parse_layernorm>
{
std::vector<op_desc> operators() const { return {{"LayerNormalization"}}; }
std::vector<instruction_ref> parse(const op_desc& /*opd*/,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int64_t axis = -1;
if(contains(info.attributes, "axis"))
{
axis = parser.parse_value(info.attributes.at("axis")).at<int64_t>();
}
float epsilon = 1e-5f;
if(contains(info.attributes, "epsilon"))
{
epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
}
if(contains(info.attributes, "stash_type"))
{
std::cerr << "WARNING: LAYERNORM does not support stash_type, it will be ignored.\n";
}
if(args.size() < 2 or args.size() > 3)
{
MIGRAPHX_THROW("PARSE_LAYERNORM: invalid input count");
}
auto x = args.at(0);
auto scale = args.at(1);
bool skip_bias = args.size() == 2;
instruction_ref bias;
if(not skip_bias)
{
bias = args.at(2);
}
auto x_shape = x->get_shape();
auto x_dtype = x_shape.type();
int64_t x_rank = x_shape.ndim();
if(x_rank < 2)
{
MIGRAPHX_THROW("PARSE_LAYERNORM: invalid input shape");
}
// If rank(X) is r, axis' allowed range is [-r, r)
if(axis < -x_rank or axis >= x_rank)
{
MIGRAPHX_THROW("PARSE_LAYERNORM: invalid axis");
}
// y = (x - mean) * rsqrt(variance + epsilon) * scale + bias
// mean = reduce_mean({D1, D2, ... Dk}, x)
// variance = reduce_mean({D1, D2, ... Dk}, (x - mean)^2)
// axis can be negative
axis = axis < 0 ? axis + x_rank : axis;
auto kdims = x_rank - axis;
std::vector<int64_t> axes(kdims);
std::iota(axes.begin(), axes.end(), axis);
auto skipped_axes = x_rank - kdims;
auto mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x);
auto x_sub_mean = info.add_common_op("sub", x, mean);
auto x_sqdiff_mean = info.add_common_op("sqdiff", x, mean);
auto variance =
info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), x_sqdiff_mean);
epsilon =
(x_dtype == migraphx::shape::half_type and std::abs(epsilon) < 1e-7) ? 1e-7 : epsilon;
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_dtype}, {epsilon}});
auto var_eps = info.add_common_op("add", variance, eps);
auto rsqrt = info.add_instruction(make_op("rsqrt"), var_eps);
auto result = info.add_common_op("mul", x_sub_mean, rsqrt);
instruction_ref scale_bcast = scale;
instruction_ref bias_bcast = bias;
if(skipped_axes > 0)
{
auto x_dims = x_shape.lens();
scale_bcast = info.add_instruction(
make_op("broadcast", {{"axis", skipped_axes}, {"out_lens", x_dims}}), scale);
if(not skip_bias)
{
bias_bcast = info.add_instruction(
make_op("broadcast", {{"axis", skipped_axes}, {"out_lens", x_dims}}), bias);
}
}
auto scaled = info.add_instruction(make_op("mul"), result, scale_bcast);
auto y = skip_bias ? scaled : info.add_instruction(make_op("add"), scaled, bias_bcast);
return {y, mean, rsqrt};
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -115,65 +115,71 @@ struct parse_pad : op_parser<parse_pad> ...@@ -115,65 +115,71 @@ struct parse_pad : op_parser<parse_pad>
{ {
std::vector<op_desc> operators() const { return {{"Pad"}}; } std::vector<op_desc> operators() const { return {{"Pad"}}; }
instruction_ref parse(const op_desc& /*opd*/, std::string parse_mode(const onnx_parser::node_info& info,
const onnx_parser& parser, const std::vector<instruction_ref>& args) const
onnx_parser::node_info info,
std::vector<instruction_ref> args) const
{ {
std::vector<int64_t> pads{}; if(contains(info.attributes, "mode"))
if(args.size() >= 2)
{ {
auto pad_arg = args.at(1)->eval(); auto mode = info.attributes.at("mode").s();
check_arg_empty(pad_arg, "PARSE_PAD: pad input must be constant"); if(mode == "reflect")
pad_arg.visit([&](auto v) { pads.assign(v.begin(), v.end()); }); {
if(args.front()->get_shape().dynamic())
{
MIGRAPHX_THROW("PARSE_PAD: reflect padding with dynamic shape not supported");
} }
else if(contains(info.attributes, "pads")) }
else if(mode != "constant")
{ {
auto&& pad_vals = info.attributes["pads"].ints(); MIGRAPHX_THROW(
pads = std::vector<int64_t>(pad_vals.begin(), pad_vals.end()); "PARSE_PAD: migraphx currently only supports constant and reflect padding");
}
return mode;
} }
else else
{ {
MIGRAPHX_THROW("PARSE_PAD: pad must be available"); // default mode
return "constant";
} }
// check if padding is actually being done (at least one value is nonzero)
if(std::all_of(pads.begin(), pads.end(), [](const int& i) { return i == 0; }))
{
return info.add_instruction(make_op("identity"), args.front());
} }
if(contains(info.attributes, "mode")) std::vector<int64_t> parse_pads(const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{ {
auto mode = info.attributes.at("mode").s(); std::vector<int64_t> pads{};
if(mode == "reflect") if(args.size() >= 2)
{
if(args.front()->get_shape().dynamic())
{ {
MIGRAPHX_THROW("PARSE_PAD: reflect padding with dynamic shape not supported"); auto pad_arg = args.at(1)->eval();
check_arg_empty(pad_arg, "PARSE_PAD: `pads` input must be constant");
pad_arg.visit([&](auto v) { pads.assign(v.begin(), v.end()); });
} }
return reflect_pad(info, pads, args.front()); else if(contains(info.attributes, "pads"))
{
auto&& pad_vals = info.attributes.at("pads").ints();
pads = std::vector<int64_t>(pad_vals.begin(), pad_vals.end());
} }
if(mode != "constant") else
{ {
MIGRAPHX_THROW( MIGRAPHX_THROW("PARSE_PAD: `pads` must be available");
"PARSE_PAD: migraphx currently only supports constant and reflect padding");
} }
return pads;
} }
float parse_constant_value(const onnx_parser& parser,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
float value = 0.0f; float value = 0.0f;
// third input is the value if(args.size() >= 3 and args.at(2)->get_shape().scalar())
if(args.size() == 3)
{ {
auto val_ins = args.at(2); auto val_ins = args.at(2);
if(not val_ins->can_eval()) if(not val_ins->can_eval())
{ {
MIGRAPHX_THROW("PARSE_PAD: input value must be constant"); MIGRAPHX_THROW("PARSE_PAD: input `value` must be constant");
} }
auto val_arg = val_ins->eval(); auto val_arg = val_ins->eval();
if(val_arg.get_shape().elements() != 1) if(val_arg.get_shape().elements() != 1)
{ {
MIGRAPHX_THROW("PARSE_PAD: value should contain only one element"); MIGRAPHX_THROW("PARSE_PAD: `value` should contain only one element");
} }
value = val_arg.at<float>(); value = val_arg.at<float>();
} }
...@@ -181,6 +187,81 @@ struct parse_pad : op_parser<parse_pad> ...@@ -181,6 +187,81 @@ struct parse_pad : op_parser<parse_pad>
{ {
value = parser.parse_value(info.attributes.at("value")).at<float>(); value = parser.parse_value(info.attributes.at("value")).at<float>();
} }
return value;
}
std::vector<int64_t> parse_axes(const std::vector<instruction_ref>& args,
bool is_constant_mode) const
{
std::vector<int64_t> axes{};
// axes is 3rd or 4th, depending on constant mode
auto pos = is_constant_mode ? 4 : 3;
if(args.size() >= pos)
{
auto axes_arg = args.at(pos - 1)->eval();
check_arg_empty(axes_arg, "PARSE_PAD: variable `axes` input not supported");
axes_arg.visit([&](auto v) { axes.assign(v.begin(), v.end()); });
}
return axes;
}
std::vector<int64_t> calculate_pads_with_axes(const std::vector<int64_t>& pads,
const std::vector<int64_t>& axes,
size_t input_rank) const
{
size_t num_axes = axes.size();
if(num_axes * 2 != pads.size())
{
MIGRAPHX_THROW("PARSE_PAD: number of elements of pads should be equal to 2 * "
"number of elements of axes");
}
std::vector<int64_t> new_pads(input_rank * 2);
for(size_t idx{0}; idx < num_axes; ++idx)
{
// axis can be negative
int64_t axis = axes[idx] < 0 ? input_rank + axes[idx] : axes[idx];
// pad format is x1_begin, x2_begin, ... , x3_end, x4_end
new_pads[axis] = pads[idx];
new_pads[axis + input_rank] = pads[idx + num_axes];
}
return new_pads;
}
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& parser,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
std::vector<int64_t> pads = parse_pads(info, args);
// check if padding is actually being done (at least one value is nonzero)
if(std::all_of(pads.begin(), pads.end(), [](const int& i) { return i == 0; }))
{
return info.add_instruction(make_op("identity"), args.front());
}
std::string mode = parse_mode(info, args);
bool is_constant_mode = mode == "constant";
float value = is_constant_mode ? parse_constant_value(parser, info, args) : 0.0f;
std::vector<int64_t> axes = parse_axes(args, is_constant_mode);
size_t input_rank = args.front()->get_shape().ndim();
if(not axes.empty())
{
pads = calculate_pads_with_axes(pads, axes, input_rank);
}
if(pads.size() != input_rank * 2)
{
MIGRAPHX_THROW("PARSE_PAD: number of elements of pads should be equal to 2 * "
"input rank");
}
if(mode == "reflect")
{
return reflect_pad(info, pads, args.front());
}
return info.add_instruction(migraphx::make_op("pad", {{"pads", pads}, {"value", value}}), return info.add_instruction(migraphx::make_op("pad", {{"pads", pads}, {"value", value}}),
args.front()); args.front());
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_shrink : op_parser<parse_shrink>
{
std::vector<op_desc> operators() const { return {{"Shrink"}}; }
instruction_ref parse(const op_desc&,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
float bias = 0.0;
if(contains(info.attributes, "bias"))
{
bias = parser.parse_value(info.attributes.at("bias")).at<float>();
}
float lambd = 0.5;
if(contains(info.attributes, "lambd"))
{
lambd = parser.parse_value(info.attributes.at("lambd")).at<float>();
}
auto x = args[0];
auto x_shape = x->get_shape();
auto x_type = x_shape.type();
auto lit_bias = info.add_literal(bias);
auto lit_neg_lambd = info.add_literal(-lambd);
auto lit_lambd = info.add_literal(lambd);
auto x_plus_bias = info.add_common_op("add", x, lit_bias);
auto x_min_bias = info.add_common_op("sub", x, lit_bias);
auto cond1 = info.add_common_op("less", x, lit_neg_lambd);
auto cond2_a = info.add_common_op("not", cond1);
auto cond2_b = info.add_common_op("greater", x, lit_lambd);
auto cond2 = info.add_common_op("logical_and", cond2_a, cond2_b);
auto mul1 = info.add_instruction(make_op("convert", {{"target_type", x_type}}), cond1);
auto mul2 = info.add_instruction(make_op("convert", {{"target_type", x_type}}), cond2);
auto first = info.add_common_op("mul", mul1, x_plus_bias);
auto second = info.add_common_op("mul", mul2, x_min_bias);
auto ret = info.add_common_op("add", first, second);
if(ret->get_shape().type() != x_type)
{
ret = info.add_instruction(make_op("convert", {{"target_type", x_type}}), ret);
}
return ret;
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params( ...@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params(
global = compute_global(local); global = compute_global(local);
} }
static bool hip_accept_non_uniform_wg()
{
static bool non_uniform_wg = hip_has_flags({"-fno-offload-uniform-block"});
return non_uniform_wg;
}
std::function<std::size_t(std::size_t local)> std::function<std::size_t(std::size_t local)>
compute_global_for(context& ctx, std::size_t n, std::size_t over) compute_global_for(context& ctx, std::size_t n, std::size_t over)
{ {
...@@ -146,11 +152,12 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over) ...@@ -146,11 +152,12 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t max_global = ctx.get_current_device().get_cu_count() * std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu(); ctx.get_current_device().get_max_workitems_per_cu();
return [n, over, max_global](std::size_t local) { return [n, over, max_global](std::size_t local) {
// hip require global workitems multiple of local workitems. It may degrade performance. std::size_t num_elements = n;
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available. if(not hip_accept_non_uniform_wg())
// https://reviews.llvm.org/D155213 {
std::size_t num_elements = ((n + local - 1) / local) * local; num_elements = (1 + (n - 1) / local) * local;
std::size_t groups = (num_elements + local - 1) / local; }
std::size_t groups = 1 + (num_elements - 1) / local;
std::size_t max_blocks = max_global / local; std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local; std::size_t nglobal = std::min(max_blocks * over, groups) * local;
return std::min(nglobal, num_elements); return std::min(nglobal, num_elements);
...@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option ...@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs); generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs);
srcs.emplace_back("args.hpp", args_hpp); srcs.emplace_back("args.hpp", args_hpp);
if(options.global % options.local != 0 and hip_accept_non_uniform_wg())
options.params += " -fno-offload-uniform-block";
else
assert(options.global % options.local == 0);
options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global); options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local); options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
options.params += " " + join_strings(compiler_warnings(), " "); options.params += " " + join_strings(compiler_warnings(), " ");
......
...@@ -199,9 +199,9 @@ struct miopen_convolution ...@@ -199,9 +199,9 @@ struct miopen_convolution
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6 // MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate = true; preallocate = true;
#endif #endif
auto x = preallocate ? to_gpu(generate_argument(x_shape)) : inputs[0]; auto x = preallocate ? to_gpu(generate_argument(x_shape)) : argument{inputs[0]};
auto w = preallocate ? to_gpu(generate_argument(w_shape)) : inputs[1]; auto w = preallocate ? to_gpu(generate_argument(w_shape)) : argument{inputs[1]};
auto y = preallocate ? allocate_gpu(output_shape) : inputs[2]; auto y = preallocate ? allocate_gpu(output_shape) : argument{inputs[2]};
auto workspace = auto workspace =
preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape); preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape);
......
...@@ -31,6 +31,14 @@ ...@@ -31,6 +31,14 @@
#include <migraphx/kernels/debug.hpp> #include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-identifier"
extern "C" __device__ size_t __ockl_get_enqueued_local_size(uint); // NOLINT
extern "C" __device__ size_t __ockl_get_local_size(uint); // NOLINT
#pragma clang diagnostic pop
#endif
namespace migraphx { namespace migraphx {
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL) #if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
...@@ -49,39 +57,33 @@ inline __device__ __attribute__((const)) index_int compute_global_size() ...@@ -49,39 +57,33 @@ inline __device__ __attribute__((const)) index_int compute_global_size()
#endif #endif
} }
// We cant just use blockDim.x to get the local size since its broken on hip #ifdef MIGRAPHX_NGROUP
// when global is not divisible by local size. In this case, we calulate the // If global is divisible by local then local can be a const
// size for the last group. #if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#define MIGRAPHX_HAS_CONST_LOCAL 1
#endif
#endif
inline __device__ __attribute__((const)) index_int compute_local_size() inline __device__ __attribute__((const)) index_int compute_local_size()
{ {
#ifdef MIGRAPHX_NLOCAL #ifdef MIGRAPHX_HAS_CONST_LOCAL
const auto nlocal = MIGRAPHX_NLOCAL; return MIGRAPHX_NLOCAL;
#else
const auto nlocal = blockDim.x; // NOLINT
#endif
#ifdef MIGRAPHX_NGROUP
const auto ngroup = MIGRAPHX_NGROUP;
#else #else
const auto ngroup = gridDim.x; // NOLINT // Returns block size. For the non-uniform block it returns the size of the non-uniform block.
return __ockl_get_local_size(0); // NOLINT
#endif #endif
const auto group_id = blockIdx.x; // NOLINT
const auto nglobal = compute_global_size();
if(group_id == ngroup - 1)
{
return 1 + (nglobal - 1) % nlocal;
}
else
{
return nlocal; // NOLINT
}
} }
#ifdef MIGRAPHX_NGROUP inline __device__ __attribute__((const)) index_int compute_max_local_size()
// If global is divisible by local then local can be a const {
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1) #ifdef MIGRAPHX_LOCAL
#define MIGRAPHX_HAS_CONST_LOCAL 1 return MIGRAPHX_NLOCAL;
#endif #else
// Returns the block size. When workgrop has non-uniform block, this returns size of the uniform
// block.
return __ockl_get_enqueued_local_size(0); // NOLINT
#endif #endif
}
struct index struct index
{ {
...@@ -126,8 +128,8 @@ struct index ...@@ -126,8 +128,8 @@ struct index
#else #else
__device__ index_int max_nlocal() const __device__ index_int max_nlocal() const
{ {
MIGRAPHX_ASSERT(blockDim.x > 0); MIGRAPHX_ASSERT(compute_max_local_size() > 0);
return blockDim.x; return compute_max_local_size();
} }
#endif #endif
...@@ -249,7 +251,8 @@ struct index ...@@ -249,7 +251,8 @@ struct index
#endif #endif
inline __device__ __attribute__((const)) index make_index() inline __device__ __attribute__((const)) index make_index()
{ {
return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT return index{
blockIdx.x * compute_max_local_size() + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -55,7 +55,7 @@ struct allocate ...@@ -55,7 +55,7 @@ struct allocate
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
...@@ -60,7 +60,7 @@ struct concat ...@@ -60,7 +60,7 @@ struct concat
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
...@@ -104,7 +104,7 @@ struct allocate ...@@ -104,7 +104,7 @@ struct allocate
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
...@@ -55,7 +55,7 @@ struct allocate ...@@ -55,7 +55,7 @@ struct allocate
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
...@@ -57,7 +57,7 @@ struct normalize_test_op ...@@ -57,7 +57,7 @@ struct normalize_test_op
const migraphx::shape& output_shape, const migraphx::shape& output_shape,
const std::vector<migraphx::argument>&) const const std::vector<migraphx::argument>&) const
{ {
return {output_shape}; return migraphx::argument{output_shape};
} }
}; };
......
6d7bc2a097a1a08541cd0d4628831c79ab8092d5 635d3faa3b3908d2806d009dc6872152cfcfcdda
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment