Unverified Commit 70d9faf7 authored by Chris Austen's avatar Chris Austen Committed by GitHub
Browse files

Merge branch 'develop' into mi200

parents a56c531c a60bdb67
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/common.hpp>
#include <migraphx/onnx/broadcast_qdq.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
/*
*********************************************************************************
* Reference: see DynamicQuantizeLinear in *
* https://github.com/onnx/onnx/blob/main/docs/Operators.md *
*********************************************************************************
DynamicQuantizeLinear
A Function to fuse calculation for Scale, Zero Point and FP32->8Bit conversion of FP32 Input data.
Outputs Scale, ZeroPoint and Quantized Input for a given FP32 Input. Scale is calculated as:
y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin)
* where qmax and qmin are max and min values for quantization range i.e. [0, 255] in case of uint8
* data range is adjusted to include 0.
Zero point is calculated as:
intermediate_zero_point = qmin - min(x)/y_scale
y_zero_point = cast(round(saturate(itermediate_zero_point)))
* where qmax and qmin are max and min values for quantization range .i.e [0, 255] in case of uint8
* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8. Right now
only uint8 is supported.
* rounding to nearest ties to even. Data quantization formula is:
y = saturate (round (x / y_scale) + y_zero_point)
* for saturation, it saturates to [0, 255] if it's uint8, or [-127, 127] if it's int8.Right now only
uint8 is supported.
* rounding to nearest ties to even.
Version
This version of the operator has been available since version 11 of the default ONNX operator set.
Inputs
x : T1
Input tensor
Outputs
y : T2
Quantized output tensor
y_scale : tensor(float)
Output scale. It's a scalar, which means a per-tensor/layer quantization.
y_zero_point : T2
Output zero point. It's a scalar, which means a per-tensor/layer quantization.
Type Constraints
T1 : tensor(float)
Constrain 'x' to float tensor.
T2 : tensor(uint8)
Constrain 'y_zero_point' and 'y' to 8-bit unsigned integer tensor.
*/
struct parse_dynamicquantizelinear : op_parser<parse_dynamicquantizelinear>
{
std::vector<op_desc> operators() const { return {{"DynamicQuantizeLinear"}}; }
std::vector<instruction_ref> parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
auto x = args[0];
auto x_shape = x->get_shape();
auto x_type = x_shape.type();
if(x_shape.dynamic())
MIGRAPHX_THROW("DYNAMICQUANTIZELINEAR: dynamic shapes are not supported");
auto x_reshaped =
(x_shape.lens().size() == 1)
? x
: info.add_instruction(
migraphx::make_op("reshape", {{"dims", {x_shape.elements()}}}), x);
auto lit_0 = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0}});
x_reshaped =
info.add_instruction(migraphx::make_op("concat", {{"axis", 0}}), x_reshaped, lit_0);
// 1. Computing y_scale
// Note: currently, DynamicQuantizeLinear only has uint8 quantization:
const auto Q_MAX = std::numeric_limits<uint8_t>::max();
const auto Q_MIN = std::numeric_limits<uint8_t>::min();
auto q_range =
info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX - Q_MIN}});
// maximum(0, max(x))
auto max_x =
info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {0}}}), x_reshaped);
// minimum(0, min(x))
auto min_x =
info.add_instruction(migraphx::make_op("reduce_min", {{"axes", {0}}}), x_reshaped);
// y_scale = (maximum(0, max(x)) - minimum(0, min(x))) / (qmax - qmin)
auto sub0 = info.add_common_op("sub", max_x, min_x);
auto y_scale = info.add_common_op("div", sub0, q_range);
// 2. Computing y_zero_point
// intermediate_zero_point = qmin - min(x) / y_scale
auto q_min = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MIN}});
auto q_max = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {Q_MAX}});
auto sub1 = info.add_common_op("sub", q_min, min_x);
auto interm_zp = info.add_common_op("div", sub1, y_scale);
// y_zero_point = cast(round(saturate(itermediate_zero_point)))
auto saturate = info.add_instruction(migraphx::make_op("clip"), interm_zp, q_min, q_max);
auto round = info.add_instruction(migraphx::make_op("nearbyint"), saturate);
auto y_zero_point = info.add_instruction(
migraphx::make_op("convert", {{"target_type", migraphx::shape::uint8_type}}), round);
// 3. quantize x with y_scale and y_zero_point
auto quant = bcast_qdq_instr("quantizelinear", x, y_scale, y_zero_point, info);
return {quant, y_scale, y_zero_point};
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -60,7 +60,7 @@ struct parse_generic_op : op_parser<parse_generic_op> ...@@ -60,7 +60,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
{"Neg", "neg"}, {"Neg", "neg"},
{"Reciprocal", "recip"}, {"Reciprocal", "recip"},
{"Relu", "relu"}, {"Relu", "relu"},
{"Round", "round"}, {"Round", "nearbyint"},
{"Sigmoid", "sigmoid"}, {"Sigmoid", "sigmoid"},
{"Sign", "sign"}, {"Sign", "sign"},
{"Sin", "sin"}, {"Sin", "sin"},
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_isinf : op_parser<parse_isinf>
{
std::vector<op_desc> operators() const { return {{"IsInf", "isinf"}}; }
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& parser,
onnx_parser::node_info info,
const std::vector<instruction_ref>& args) const
{
bool detect_negative = true;
bool detect_positive = true;
if(contains(info.attributes, "detect_negative"))
{
detect_negative = static_cast<bool>(
parser.parse_value(info.attributes.at("detect_negative")).at<int>());
}
if(contains(info.attributes, "detect_positive"))
{
detect_positive = static_cast<bool>(
parser.parse_value(info.attributes.at("detect_positive")).at<int>());
}
auto x_shape = args[0]->get_shape();
if(not detect_negative and not detect_positive)
{
return info.add_instruction(
make_op("multibroadcast", {{"out_lens", x_shape.lens()}}),
info.add_literal(migraphx::literal{migraphx::shape{shape::bool_type}, {false}}));
}
auto is_inf = info.add_instruction(make_op("isinf"), args[0]);
if(detect_negative and detect_positive)
{
return is_inf;
}
auto zero_l = info.add_literal(migraphx::literal{migraphx::shape{x_shape.type()}, {0}});
auto mb_zero =
info.add_instruction(make_op("multibroadcast", {{"out_lens", x_shape.lens()}}), zero_l);
auto cond = info.add_broadcastable_binary_op(
detect_negative ? "less" : "greater", args[0], mb_zero);
if(cond->get_shape().type() != shape::bool_type)
{
cond =
info.add_instruction(make_op("convert", {{"target_type", shape::bool_type}}), cond);
}
return info.add_instruction(make_op("logical_and"), is_inf, cond);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -58,6 +58,16 @@ struct parse_loop : op_parser<parse_loop> ...@@ -58,6 +58,16 @@ struct parse_loop : op_parser<parse_loop>
} }
} }
// cap max_iter because loop uses static shapes with max_iter size and huge numbers
// here can cause overflow
if(max_iterations > parser.limit_max_iterations)
{
std::cerr << "WARNING: PARSE_LOOP max_iterations exceeds the maximum loop "
"iterations limit, it will be changed from "
<< max_iterations << " to " << parser.limit_max_iterations << ".\n";
max_iterations = parser.limit_max_iterations;
}
// condition input is empty // condition input is empty
if(args.at(1)->name() == "undefined") if(args.at(1)->name() == "undefined")
{ {
......
...@@ -116,6 +116,37 @@ void lstm_actv_functions(op::rnn_direction dirct, std::vector<std::string>& actv ...@@ -116,6 +116,37 @@ void lstm_actv_functions(op::rnn_direction dirct, std::vector<std::string>& actv
} }
} }
void lstm_transpose_inputs(onnx_parser::node_info& info, std::vector<instruction_ref>& args)
{
std::vector<int64_t> perm{1, 0, 2};
args[0] = info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[0]);
if(args.size() >= 6 and not args[5]->is_undefined())
{
args[5] = info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[5]);
}
if(args.size() >= 7 and not args[6]->is_undefined())
{
args[6] = info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[6]);
}
}
void lstm_transpose_outputs(onnx_parser::node_info& info,
instruction_ref& hidden_states,
instruction_ref& last_output,
instruction_ref& last_cell_output)
{
std::vector<int64_t> perm_hs{2, 0, 1, 3};
hidden_states =
info.add_instruction(make_op("transpose", {{"permutation", perm_hs}}), hidden_states);
std::vector<int64_t> perm_last{1, 0, 2};
last_output =
info.add_instruction(make_op("transpose", {{"permutation", perm_last}}), last_output);
last_cell_output =
info.add_instruction(make_op("transpose", {{"permutation", perm_last}}), last_cell_output);
}
struct parse_lstm : op_parser<parse_lstm> struct parse_lstm : op_parser<parse_lstm>
{ {
std::vector<op_desc> operators() const { return {{"LSTM"}}; } std::vector<op_desc> operators() const { return {{"LSTM"}}; }
...@@ -202,6 +233,12 @@ struct parse_lstm : op_parser<parse_lstm> ...@@ -202,6 +233,12 @@ struct parse_lstm : op_parser<parse_lstm>
input_forget = parser.parse_value(info.attributes.at("input_forget")).at<int>(); input_forget = parser.parse_value(info.attributes.at("input_forget")).at<int>();
} }
int layout = 0;
if(contains(info.attributes, "layout"))
{
layout = parser.parse_value(info.attributes.at("layout")).at<int>();
}
// append undefined opeator to make 6 arguments // append undefined opeator to make 6 arguments
if(args.size() < 8) if(args.size() < 8)
{ {
...@@ -209,6 +246,11 @@ struct parse_lstm : op_parser<parse_lstm> ...@@ -209,6 +246,11 @@ struct parse_lstm : op_parser<parse_lstm>
args.insert(args.end(), 8 - args.size(), ins); args.insert(args.end(), 8 - args.size(), ins);
} }
if(layout != 0)
{
lstm_transpose_inputs(info, args);
}
// first output for concatenation of hidden states // first output for concatenation of hidden states
auto hidden_states = info.add_instruction(make_op("lstm", auto hidden_states = info.add_instruction(make_op("lstm",
{{"hidden_size", hidden_size}, {{"hidden_size", hidden_size},
...@@ -224,6 +266,11 @@ struct parse_lstm : op_parser<parse_lstm> ...@@ -224,6 +266,11 @@ struct parse_lstm : op_parser<parse_lstm>
auto last_cell_output = auto last_cell_output =
info.add_instruction(make_op("rnn_last_cell_output"), hidden_states); info.add_instruction(make_op("rnn_last_cell_output"), hidden_states);
if(layout != 0)
{
lstm_transpose_outputs(info, hidden_states, last_output, last_cell_output);
}
return {hidden_states, last_output, last_cell_output}; return {hidden_states, last_output, last_cell_output};
} }
}; };
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial> ...@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial>
const onnx_parser::node_info& info, const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
if(args.empty())
MIGRAPHX_THROW("PARSE_MULTINOMIAL: no arguments given");
int dtype = 6; int dtype = 6;
if(contains(info.attributes, "dtype")) if(contains(info.attributes, "dtype"))
dtype = info.attributes.at("dtype").i(); dtype = info.attributes.at("dtype").i();
...@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial> ...@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial>
size_t sample_size = 1; size_t sample_size = 1;
if(contains(info.attributes, "sample_size")) if(contains(info.attributes, "sample_size"))
sample_size = info.attributes.at("sample_size").i(); sample_size = info.attributes.at("sample_size").i();
else
MIGRAPHX_THROW("PARSE_MULTINOMIAL: sample_size not given");
// Use logarithmic math to scale probabilities while avoiding division by very
// small numbers. Scaling by the maximum makes very tiny ranges more
// tractable; any constant factor gives equivalent distr. since the Multinomial op.
// normalizes at runtime.
// Subtract the per-batch maximum log-probability, making the per-batch max 0 // Subtract the per-batch maximum log-probability, making the per-batch max 0
auto maxes = auto maxes =
info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]); info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]);
auto mb_maxes = info.add_instruction( auto cdf = info.add_common_op("sub", args[0], maxes);
migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
maxes);
auto cdf = info.add_instruction(migraphx::make_op("sub"), args[0], mb_maxes);
// Take the element-wise exponent to get probabilities in the range (0, 1] // Take the element-wise exponent to get probabilities in the range (0, 1]
cdf = info.add_instruction(migraphx::make_op("exp"), cdf); cdf = info.add_instruction(migraphx::make_op("exp"), cdf);
// Compute the cumulative density function // Compute the cumulative distribution function
cdf = info.add_instruction( cdf = info.add_instruction(
migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);
// Pre-compute random distribution instruction_ref seed_input;
std::mt19937 gen(std::chrono::high_resolution_clock::now().time_since_epoch().count());
if(contains(info.attributes, "seed")) if(contains(info.attributes, "seed"))
gen.seed(info.attributes.at("seed").f()); {
float seed = info.attributes.at("seed").f();
migraphx::shape s{migraphx::shape::float_type, {1}};
std::vector<float> data = {seed};
seed_input = info.add_literal(migraphx::literal(s, data));
}
else
{
seed_input = info.add_instruction(migraphx::make_op("random_seed"));
}
instruction_ref randoms;
shape s0 = args[0]->get_shape();
if(s0.dynamic())
{
// Dynamic batch_size will be taken from args[0]. The input argument to this should
// have a second dimension of sample_size.
std::vector<shape::dynamic_dimension> dyn_dim_set;
dyn_dim_set.emplace_back(s0.dyn_dims().front());
dyn_dim_set.emplace_back(shape::dynamic_dimension{sample_size, sample_size});
// read the input dimensions
auto dim_of =
info.add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), args[0]);
// The next two operations insert the value sample_size into the second array position
// make an argument of (1, 0)
shape s(shape::int64_type, {2});
std::vector<int64_t> data1{1, 0};
auto l1 = info.add_literal(s, data1);
auto batch_arg = info.add_instruction(migraphx::make_op("mul"), dim_of, l1);
std::vector<int64_t> data2(2, 0);
// make an argument of (0, sample_size)
data2[1] = sample_size;
auto l2 = info.add_literal(s, data2);
auto alloc_shape = info.add_instruction(migraphx::make_op("add"), batch_arg, l2);
// alloc_shape should contain the input-based shape dimensions as its values at runtime,
// and its own shape is {2}
std::uniform_real_distribution<> dis(0.0, 1.0); // compile_shape is the shape used when compiling the Allocate op, and may be dynamic
size_t batch_size = args[0]->get_shape().lens().front(); migraphx::shape compile_shape =
migraphx::shape dist_shape{migraphx::shape::float_type, {batch_size, sample_size}}; migraphx::shape(s0.type(), {s0.dyn_dims().front(), {sample_size, sample_size}});
std::vector<float> random_dist(batch_size * sample_size); // Allocate on-device storage for the random values
std::generate(random_dist.begin(), random_dist.end(), [&]() { return dis(gen); }); auto alloc = info.add_instruction(
auto dist_lit = info.add_literal(migraphx::literal{dist_shape, random_dist}); migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
randoms = info.add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
}
else
{
// use literal. The array populated by random_uniform may have any shape, as long its
// number of elements is batch_size * sample_size .
size_t batch_size = s0.lens().front();
auto rand_dummy = info.add_literal(migraphx::literal{
migraphx::shape{migraphx::shape::float_type, {batch_size, sample_size}},
std::vector<float>(batch_size * sample_size)});
randoms =
info.add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
}
return info.add_instruction( return info.add_instruction(
migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, dist_lit); migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, randoms);
} }
}; };
......
...@@ -22,14 +22,8 @@ ...@@ -22,14 +22,8 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <migraphx/onnx/op_parser.hpp> #include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp> #include <migraphx/onnx/pooling.hpp>
#include <migraphx/onnx/padding.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/make_op.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -39,68 +33,14 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -39,68 +33,14 @@ struct parse_pooling : op_parser<parse_pooling>
{ {
std::vector<op_desc> operators() const std::vector<op_desc> operators() const
{ {
return {{"AveragePool", "average"}, return {
{"GlobalAveragePool", "average"}, {"AveragePool", "average"},
{"GlobalMaxPool", "max"}, {"GlobalAveragePool", "average"},
{"MaxPool", "max"}, {"GlobalMaxPool", "max"},
{"LpPool", "lpnorm"}, {"MaxPool", "max"},
{"GlobalLpPool", "lpnorm"}}; {"LpPool", "lpnorm"},
} {"GlobalLpPool", "lpnorm"},
};
value handle_values(const op_desc& opd,
onnx_parser::node_info info,
const shape& in_shape,
value values) const
{
auto kdims = in_shape.ndim() - 2;
if(starts_with(opd.onnx_name, "Global"))
{
// if spatial dimensions are dynamic use dyn_global flag
if(in_shape.dynamic() and std::any_of(in_shape.dyn_dims().cbegin() + 2,
in_shape.dyn_dims().cend(),
[](auto dd) { return not dd.is_fixed(); }))
{
values["dyn_global"] = true;
values["lengths"] = std::vector<size_t>();
}
else
{
// works with static and fixed dynamic shape
auto m_lens = in_shape.max_lens();
values["lengths"] = std::vector<size_t>(m_lens.begin() + 2, m_lens.end());
}
}
if(contains(info.attributes, "ceil_mode"))
{
values["ceil_mode"] = static_cast<bool>(info.attributes.at("ceil_mode").i());
}
if(contains(info.attributes, "strides"))
{
values["stride"].clear();
copy(info.attributes["strides"].ints(), std::back_inserter(values["stride"]));
check_attr_sizes(kdims, values["stride"].size(), "PARSE_POOLING: inconsistent strides");
}
if(contains(info.attributes, "kernel_shape"))
{
values["lengths"].clear();
copy(info.attributes["kernel_shape"].ints(), std::back_inserter(values["lengths"]));
check_attr_sizes(
kdims, values["lengths"].size(), "PARSE_POOLING: inconsistent lengths");
}
// lp_order attribute
if(contains(info.attributes, "p"))
{
values["lp_order"] = info.attributes.at("p").i();
}
// ensure pads available only when auto_pad is "NOT_SET"
check_padding_mode(info, "POOLING");
return values;
} }
instruction_ref parse(const op_desc& opd, instruction_ref parse(const op_desc& opd,
...@@ -108,144 +48,8 @@ struct parse_pooling : op_parser<parse_pooling> ...@@ -108,144 +48,8 @@ struct parse_pooling : op_parser<parse_pooling>
onnx_parser::node_info info, onnx_parser::node_info info,
std::vector<instruction_ref> args) const std::vector<instruction_ref> args) const
{ {
std::string mode = opd.op_name; return add_pooling_op(opd, std::move(info), args[0]);
const std::unordered_map<std::string, op::pooling_mode> mode_map = { };
{"max", op::pooling_mode::max},
{"average", op::pooling_mode::average},
{"lpnorm", op::pooling_mode::lpnorm}};
if(not contains(mode_map, mode))
{
MIGRAPHX_THROW(
"PARSE_POOLING: onnx pooling mode must be [\"max\", \"average\", \"lpnorm\"]");
}
operation op = make_op("pooling", {{"mode", mode_map.at(mode)}});
value values = op.to_value();
auto l0 = args[0];
auto in_shape = l0->get_shape();
assert(in_shape.ndim() > 2);
auto kdims = in_shape.ndim() - 2;
values = handle_values(opd, info, in_shape, values);
// count include padding, if count include pad is 1, we always use
// explicit pad
int count_include_pad = 0;
if(contains(info.attributes, "count_include_pad"))
{
if(in_shape.dynamic())
{
MIGRAPHX_THROW("PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape");
}
count_include_pad = info.attributes.at("count_include_pad").i();
}
std::vector<int64_t> paddings;
float pad_val = ((mode == "max") ? std::numeric_limits<float>::lowest() : 0.0f);
if(contains(info.attributes, "pads"))
{
values["padding"].clear();
copy(info.attributes["pads"].ints(), std::back_inserter(paddings));
check_attr_sizes(
kdims, paddings.size() / 2, "PARSE_POOLING: inconsistent explicit paddings");
}
if(paddings.size() != 2 * kdims)
{
paddings.resize(kdims * 2);
std::fill_n(paddings.begin(), 2 * kdims, 0);
}
if(values["padding"].size() != kdims)
{
values["padding"].resize(kdims);
std::fill_n(values["padding"].begin(), kdims, 0);
}
if(values["stride"].size() != kdims)
{
values["stride"].resize(kdims);
std::fill_n(values["stride"].begin(), kdims, 1);
}
// used to calculate the supposed output shape
std::vector<int64_t> orig_padding = paddings;
// TODO: add parsing for dilations
if(contains(info.attributes, "auto_pad") and
to_upper(info.attributes["auto_pad"].s()) != "NOTSET")
{
auto auto_pad = to_upper(info.attributes["auto_pad"].s());
// don't use the given padding sizes, if any
// values["padding"].clear();
if(in_shape.dynamic())
{
// set padding_mode to trigger auto padding at runtime
bool is_same_upper = (auto_pad.find("SAME_UPPER") != std::string::npos);
values["padding_mode"] = is_same_upper ? to_value(op::padding_mode_t::same_upper)
: to_value(op::padding_mode_t::same_lower);
}
else
{
// Calculate auto padding
// dilations (argument 4) not supported; default to all 1's
cal_auto_padding_size(info,
values,
values["lengths"].to_vector<std::size_t>(),
std::vector<size_t>(in_shape.ndim() - 2, 1),
in_shape.lens(),
paddings);
values["padding"] = paddings;
// default padding_mode indicates that padding sizes are not calculated dynamically
values["padding_mode"] = migraphx::op::padding_mode_t::default_;
}
}
std::vector<int64_t> slice_start;
std::vector<int64_t> slice_end;
tune_padding_size(values, paddings, count_include_pad, slice_start);
if(not slice_start.empty())
{
if(in_shape.dynamic())
{
MIGRAPHX_THROW(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape");
}
// calculate expected output shape
orig_padding.insert(orig_padding.begin() + kdims, 2, 0);
orig_padding.insert(orig_padding.begin(), 2, 0);
op::pad pad{orig_padding, 0.0f};
shape padded_shape = pad.compute_shape({l0->get_shape()});
// make an op just to get its output shape
auto out_lens = make_op("pooling", values).compute_shape({padded_shape}).lens();
// compute slice_end information
slice_end.resize(slice_start.size());
std::transform(out_lens.begin() + 2,
out_lens.end(),
slice_start.begin(),
slice_end.begin(),
[](auto i, auto j) { return i + j; });
}
values["padding"] = std::vector<size_t>(paddings.begin(), paddings.end());
check_asym_padding(info, l0, paddings, values, count_include_pad, pad_val);
op.from_value(values);
auto l1 = info.add_instruction(op, l0);
if(not slice_start.empty())
{
std::vector<int64_t> axes(kdims);
std::iota(axes.begin(), axes.end(), 2);
l1 = info.add_instruction(
make_op("slice", {{"axes", axes}, {"starts", slice_start}, {"ends", slice_end}}),
l1);
}
return l1;
}
}; };
} // namespace onnx } // namespace onnx
......
...@@ -36,7 +36,7 @@ namespace onnx { ...@@ -36,7 +36,7 @@ namespace onnx {
/* /*
********************************************************************************* *********************************************************************************
* Reference: see QLinearAdd in * * Reference: see QLinearAdd, QLinearMul in *
* https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md * * https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md *
********************************************************************************* *********************************************************************************
...@@ -49,6 +49,17 @@ namespace onnx { ...@@ -49,6 +49,17 @@ namespace onnx {
This version of the operator has been available since version 1 of the 'com.microsoft' operator This version of the operator has been available since version 1 of the 'com.microsoft' operator
set. set.
com.microsoft.QLinearMul
Performs element-wise binary multiplication on 8 bit data types (with Numpy-style broadcasting
support).
C = ((A - A_zero_point) * (B - B_zero_point)) * (A_scale * B_scale)/C_scale + C_zero_point
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator
set.
General definition of binary QLinear* ops:
Inputs (7 - 8) Inputs (7 - 8)
A : T A : T
First operand. First operand.
...@@ -88,15 +99,18 @@ namespace onnx { ...@@ -88,15 +99,18 @@ namespace onnx {
*/ */
struct parse_qlinearadd : op_parser<parse_qlinearadd> struct parse_qlinearbinary : op_parser<parse_qlinearbinary>
{ {
std::vector<op_desc> operators() const { return {{"QLinearAdd"}}; } std::vector<op_desc> operators() const
{
return {{"QLinearAdd", "add"}, {"QLinearMul", "mul"}};
}
// basic type checking for QLinearAdd Operator // basic type checking for binary QLinear Operator
void check_inputs(const std::vector<instruction_ref>& args) const void check_inputs(const std::vector<instruction_ref>& args, const std::string& op_name) const
{ {
if(args.size() < 7) if(args.size() < 7)
MIGRAPHX_THROW("QLINEARADD: missing inputs"); MIGRAPHX_THROW(op_name + ": missing inputs");
const auto& in_a = args[0]; const auto& in_a = args[0];
const auto& in_b = args[3]; const auto& in_b = args[3];
...@@ -107,19 +121,19 @@ struct parse_qlinearadd : op_parser<parse_qlinearadd> ...@@ -107,19 +121,19 @@ struct parse_qlinearadd : op_parser<parse_qlinearadd>
auto type_a = sh_a.type(); auto type_a = sh_a.type();
auto type_b = sh_b.type(); auto type_b = sh_b.type();
if(type_a != migraphx::shape::int8_type and type_a != migraphx::shape::uint8_type) if(type_a != migraphx::shape::int8_type and type_a != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARADD: unsupported input type"); MIGRAPHX_THROW(op_name + ": unsupported input type");
if(type_b != migraphx::shape::int8_type and type_b != migraphx::shape::uint8_type) if(type_b != migraphx::shape::int8_type and type_b != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARADD: unsupported input type"); MIGRAPHX_THROW(op_name + ": unsupported input type");
if(type_a != type_b) if(type_a != type_b)
MIGRAPHX_THROW("QLINEARADD: mismatched input types"); MIGRAPHX_THROW(op_name + ": mismatched input types");
} }
instruction_ref parse(const op_desc& /* opd */, instruction_ref parse(const op_desc& opd,
const onnx_parser& /*parser*/, const onnx_parser& /*parser*/,
const onnx_parser::node_info& info, const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const const std::vector<instruction_ref>& args) const
{ {
check_inputs(args); check_inputs(args, opd.op_name);
// A // A
const auto& in_a = args[0]; const auto& in_a = args[0];
...@@ -134,8 +148,8 @@ struct parse_qlinearadd : op_parser<parse_qlinearadd> ...@@ -134,8 +148,8 @@ struct parse_qlinearadd : op_parser<parse_qlinearadd>
const auto& in_zero_pt_b = args[5]; const auto& in_zero_pt_b = args[5];
auto dquant_b = bcast_qdq_instr("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info); auto dquant_b = bcast_qdq_instr("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info);
// C = A + B // C = op(A, B)
auto out_c = info.add_common_op("add", dquant_a, dquant_b); auto out_c = info.add_common_op(opd.op_name, dquant_a, dquant_b);
const auto& in_scale_c = args[6]; const auto& in_scale_c = args[6];
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/padding.hpp>
#include <migraphx/onnx/conv.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/broadcast_qdq.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
struct parse_qlinearconcat : op_parser<parse_qlinearconcat>
{
std::vector<op_desc> operators() const { return {{"QLinearConcat"}}; }
// basic type checking for QLinearConcat Operator
void check_inputs(const std::vector<instruction_ref>& args) const
{
auto args_size = args.size();
// at least 5 input tensors:
// 1. is Y_scale: tensor(float)
// 2. is Y_zero_pont: tensor(uint8)/tensor(int8)
// remaining is a sequence of :
// 3. Tensor: tensor(uint8)/tensor(int8)
// 4. Scale: tensor(float),
// 5. ZeroPoint: tensor(uint8)/tensor(int8) tensors
// Size can be 5, 8, 11 ...
if((args_size < 5) or ((args_size - 2) % 3 != 0))
MIGRAPHX_THROW("QLINEARCONCAT: missing inputs");
auto y_zp = args[1];
auto y_zp_type = y_zp->get_shape().type();
if(y_zp_type != migraphx::shape::int8_type and y_zp_type != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARCONCAT: unsupported output type");
auto t0_type = args[2]->get_shape().type();
if(t0_type != migraphx::shape::int8_type and t0_type != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARCONCAT: unsupported input type");
for(auto idx = 2; idx < args.size(); idx += 3)
{
if((args[idx]->get_shape().type() != t0_type) or
(args[idx + 2]->get_shape().type() != t0_type))
{
MIGRAPHX_THROW("QLINEARCONCAT: mismatching input types");
}
}
}
instruction_ref parse(const op_desc& /* opd */,
const onnx_parser& parser,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
check_inputs(args);
if(not contains(info.attributes, "axis"))
MIGRAPHX_THROW("QLINEARCONCAT: missing axis attribute");
auto axis = parser.parse_value(info.attributes.at("axis")).template at<int64_t>();
std::vector<instruction_ref> tmp;
for(auto idx = 2; idx < args.size(); idx += 3)
{
auto data_tensor = args[idx];
auto scale = args[idx + 1];
auto zero_pt = args[idx + 2];
tmp.push_back(bcast_qdq_instr("dequantizelinear", data_tensor, scale, zero_pt, info));
}
auto y = info.add_instruction(migraphx::make_op("concat", {{"axis", axis}}), tmp);
auto y_scale = args[0];
auto y_zero_pt = args[1];
return bcast_qdq_instr("quantizelinear", y, y_scale, y_zero_pt, info);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
*/ */
#include <migraphx/onnx/op_parser.hpp> #include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/pooling.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraphx/op/pooling.hpp> #include <migraphx/op/pooling.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
...@@ -36,90 +37,56 @@ namespace onnx { ...@@ -36,90 +37,56 @@ namespace onnx {
/* /*
********************************************************************************* *********************************************************************************
* Reference: see QLinearGlobalAveragePool in * * Reference: see QLinearAveragePool and QLinearGlobalAveragePool in *
* github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md * * github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md *
********************************************************************************* *********************************************************************************
*/
QLinearGlobalAveragePool consumes an input tensor X and applies struct parse_qlinearpooling : op_parser<parse_qlinearpooling>
Average pooling across the values in the same channel. This is
equivalent to AveragePool with kernel size equal to the spatial
dimension of input tensor. Input is of type uint8_t or int8_t.
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
Attributes
channels_last : int
Inputs
X : T
Input data tensor from the previous operator; According to channels_last, dimensions for image case
are (N x C x H x W), or (N x H x W x C) where N is the batch size, C is the number of channels, and
H and W are the height and the width of the data. For non image case, the dimensions are in the form
of (N x C x D1 x D2 ... Dn), or (N x D1 X D2 ... Dn x C) where N is the batch size.
x_scale : tensor(float)
Scale of quantized input 'X'. It must be a scalar.
x_zero_point : T
Zero point tensor for input 'X'. It must be a scalar.
y_scale : tensor(float)
Scale of quantized output 'Y'. It must be a scalar.
y_zero_point : T
Zero point tensor for output 'Y'. It must be a scalar.
Outputs
Y : T
Output data tensor from pooling across the input tensor. The output tensor has the same rank as the
input. with the N and C value keep it value, while the other dimensions are all 1. Type Constraints
T : tensor(uint8), tensor(int8)
Constrain input and output types to signed/unsigned int8 tensors.
*/
struct parse_qlinearglobalaveragepool : op_parser<parse_qlinearglobalaveragepool>
{ {
std::vector<op_desc> operators() const { return {{"QLinearGlobalAveragePool"}}; } std::vector<op_desc> operators() const
// basic type checking for QLinearGlobalAveragePool Operator
void check_inputs(const std::vector<instruction_ref>& args) const
{ {
if(args.size() < 5) return {{"QLinearGlobalAveragePool", "average"}, {"QLinearAveragePool", "average"}};
MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: missing inputs"); }
const auto& in_x = args[0]; void check_inputs(const op_desc& opd, const std::vector<instruction_ref>& args) const
const auto& zero_pt_x = args[2]; {
const auto& zero_pt_y = args[4]; const auto& in_x = args[0];
const auto onnx_name = opd.onnx_name;
if(in_x->get_shape().ndim() <= 2) if(in_x->get_shape().ndim() <= 2)
MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: input dimensions too small"); MIGRAPHX_THROW(onnx_name + ": input dimensions too small");
auto type_x = in_x->get_shape().type(); auto type_x = in_x->get_shape().type();
if(type_x != migraphx::shape::int8_type and type_x != migraphx::shape::uint8_type) if(type_x != migraphx::shape::int8_type and type_x != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: unsupported input type"); MIGRAPHX_THROW(onnx_name + ": unsupported input type");
const auto& zero_pt_x = args[2];
if(type_x != zero_pt_x->get_shape().type()) if(type_x != zero_pt_x->get_shape().type())
MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: mismatched type: input zero point"); MIGRAPHX_THROW(onnx_name + ": mismatched type: input zero point");
if(type_x != zero_pt_y->get_shape().type()) if(args.size() == 5)
MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: mismatched type: output zero point"); {
const auto& zero_pt_y = args[4];
if(type_x != zero_pt_y->get_shape().type())
MIGRAPHX_THROW(onnx_name + ": mismatched type: output zero point");
}
} }
instruction_ref parse(const op_desc& /* opd */, instruction_ref parse(const op_desc& opd,
const onnx_parser& parser, const onnx_parser& parser,
const onnx_parser::node_info& info, const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const const std::vector<instruction_ref>& args) const
{ {
int channels_last = if(contains(info.attributes, "channel_last"))
parser.parse_value(info.attributes.at("channels_last")).template at<int>(); {
if(channels_last != 0) int channels_last =
MIGRAPHX_THROW( parser.parse_value(info.attributes.at("channels_last")).template at<int>();
"QLINEARGLOBALAVERAGEPOOL: channels_last (N x D1..Dn x C) is not supported"); if(channels_last != 0)
MIGRAPHX_THROW(opd.onnx_name + ": channels_last (N x D1..Dn x C) is not supported");
}
check_inputs(args); check_inputs(opd, args);
// Input: X // Input: X
...@@ -128,21 +95,18 @@ struct parse_qlinearglobalaveragepool : op_parser<parse_qlinearglobalaveragepool ...@@ -128,21 +95,18 @@ struct parse_qlinearglobalaveragepool : op_parser<parse_qlinearglobalaveragepool
const auto& zero_pt_x = args[2]; const auto& zero_pt_x = args[2];
auto dquant_x = bcast_qdq_instr("dequantizelinear", in_x, scale_x, zero_pt_x, info); auto dquant_x = bcast_qdq_instr("dequantizelinear", in_x, scale_x, zero_pt_x, info);
// Output Y = globalaveragepool(X) // Output Y = pooling_op(X)
auto op = migraphx::op::pooling{migraphx::op::pooling_mode::average};
auto lens = in_x->get_shape().lens();
std::vector<size_t> lengths(lens.begin() + 2, lens.end());
op.lengths = lengths;
op.padding = std::vector<size_t>(lens.size());
auto out_y = info.add_instruction(op, dquant_x);
const auto& scale_y = args[3]; auto out_y = add_pooling_op(opd, info, dquant_x);
const auto& zero_pt_y = args[4];
auto out_quant_y = bcast_qdq_instr("quantizelinear", out_y, scale_y, zero_pt_y, info); const auto& in_scale_y = args[3];
// zero_pt for Y is supplied as the last optional argument..
if(args.size() == 5)
return (bcast_qdq_instr("quantizelinear", out_y, in_scale_y, args[4], info));
return out_quant_y; // if no zero_pt: just broadcast the scale..
auto bcast_scale_y = bcast_scalar_instr(out_y->get_shape(), in_scale_y, info);
return (info.add_instruction(migraphx::make_op("quantizelinear"), out_y, bcast_scale_y));
} }
}; };
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/common.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/broadcast_qdq.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/instruction.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
/*
*********************************************************************************
* Reference: see QLinearSigmoid, QLinearLeakyRelu in *
* https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md *
*********************************************************************************
com.microsoft.QLinearSigmoid
QLinearSigmoid takes quantized input data (Tensor), and quantize parameter for output, and produces
one output data (Tensor) where the function f(x) = quantize(Sigmoid(dequantize(x))), is applied to
the data tensor elementwise. Where the function Sigmoid(x) = 1 / (1 + exp(-x))
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator
set.
*****************************************************************************************************
com.microsoft.QLinearLeakyRelu
QLinearLeakyRelu takes quantized input data (Tensor), an argument alpha, and quantize parameter for
output, and produces one output data (Tensor) where the function f(x) = quantize(alpha *
dequantize(x)) for dequantize(x) < 0, f(x) = quantize(dequantize(x)) for dequantize(x) >= 0, is
applied to the data tensor elementwise.
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
Attributes
alpha : float
Coefficient of leakage.
******************************************************************************************************
Generic input layout of QLinear unary operators:
Inputs (4 - 5)
X : T
Input tensor
X_scale : tensor(float)
Input X's scale. It's a scalar, which means a per-tensor/layer quantization.
X_zero_point (optional) : T
Input X's zero point. Default value is 0 if it's not specified. It's a scalar, which means a
per-tensor/layer quantization.
Y_scale : tensor(float) Output Y's scale. It's a scalar, which means
a per-tensor/layer quantization.
Y_zero_point (optional) : T Output Y's zero point. Default value is
0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.
Outputs
Y : T
Output tensor
Type Constraints
T : tensor(uint8), tensor(int8)
Constrain input and output types to 8 bit tensors.
*/
struct parse_qlinearunary : op_parser<parse_qlinearunary>
{
std::vector<op_desc> operators() const
{
return {{"QLinearSigmoid", "sigmoid"}, {"QLinearLeakyRelu", "leaky_relu"}};
}
void check_inputs(const op_desc& opd, const std::vector<instruction_ref>& args) const
{
if(args.size() < 4)
MIGRAPHX_THROW(opd.op_name + ": missing inputs");
const auto& in_x = args[0];
auto sh_x = in_x->get_shape();
auto type_x = sh_x.type();
if(type_x != migraphx::shape::int8_type and type_x != migraphx::shape::uint8_type)
MIGRAPHX_THROW(opd.op_name + ": unsupported input type");
}
instruction_ref parse(const op_desc& opd,
const onnx_parser& parser,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
check_inputs(opd, args);
// X
const auto& in_x = args[0];
const auto& in_scale_x = args[1];
const auto& in_zero_pt_x = args[2];
auto dquant_x = bcast_qdq_instr("dequantizelinear", in_x, in_scale_x, in_zero_pt_x, info);
// Y = (op(dequantizelinear(x))
auto op = parser.load(opd.op_name, info);
auto y = info.add_instruction(op, dquant_x);
const auto& in_scale_y = args[3];
// zero_pt for Y is supplied as the last optional argument..
if(args.size() == 5)
return (bcast_qdq_instr("quantizelinear", y, in_scale_y, args[4], info));
// if no zero_pt: just broadcast the scale..
auto bcast_scale_sigm = bcast_scalar_instr(y->get_shape(), in_scale_y, info);
return (info.add_instruction(migraphx::make_op("quantizelinear"), y, bcast_scale_sigm));
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -181,6 +181,76 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr) ...@@ -181,6 +181,76 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr)
return nearest_mode; return nearest_mode;
} }
static std::vector<double> get_scales(const onnx_parser::attribute_map& attr)
{
std::vector<double> scales;
if(contains(attr, "scales"))
{
copy(attr.at("scales").floats(), std::back_inserter(scales));
}
return scales;
}
static void parse_args(const std::vector<instruction_ref>& args,
const std::vector<size_t>& in_lens,
const std::string& op_name,
std::vector<double>& vec_scale,
std::vector<std::size_t>& out_lens)
{
for(const auto& arg : args)
{
if(arg->name() == "undefined" or arg == args.front())
{
continue;
}
// skipped empty input
auto lens = arg->get_shape().lens();
if(lens.empty())
{
continue;
}
auto type = arg->get_shape().type();
// output size
if(type == shape::int64_type)
{
auto arg_out_s = arg->eval();
check_arg_empty(arg_out_s,
"PARSE_" + op_name + ": dynamic output size is not supported!");
arg_out_s.visit([&](const auto& ol) { out_lens.assign(ol.begin(), ol.end()); });
if(out_lens.size() != in_lens.size())
{
MIGRAPHX_THROW("PARSE_" + op_name +
": specified output size does not match input size");
}
// compute the scale
vec_scale.resize(in_lens.size());
std::transform(in_lens.begin(),
in_lens.end(),
out_lens.begin(),
vec_scale.begin(),
[](auto iss, auto oss) { return 1.0 * oss / iss; });
}
else
{
// scale input
if(lens[0] == in_lens.size())
{
auto arg_scale = arg->eval();
check_arg_empty(arg_scale,
"PARSE_" + op_name + ": dynamic input scale is not supported!");
arg_scale.visit([&](const auto& v) { vec_scale.assign(v.begin(), v.end()); });
}
}
}
}
struct parse_resize : op_parser<parse_resize> struct parse_resize : op_parser<parse_resize>
{ {
std::vector<op_desc> operators() const { return {{"Resize"}, {"Upsample"}}; } std::vector<op_desc> operators() const { return {{"Resize"}, {"Upsample"}}; }
...@@ -214,72 +284,30 @@ struct parse_resize : op_parser<parse_resize> ...@@ -214,72 +284,30 @@ struct parse_resize : op_parser<parse_resize>
std::vector<std::size_t> out_lens(in_lens.size()); std::vector<std::size_t> out_lens(in_lens.size());
// scale // scale
std::vector<double> vec_scale; std::vector<double> vec_scale = get_scales(info.attributes);
for(const auto& arg : args) // If `scales` was not an attribute, it must be an input
if(vec_scale.empty())
{ {
if(arg->name() == "undefined" or arg == args.front()) // Depending on the args, it *must* populate the `vec_scale`, and might populate
{ // `out_lens`
continue; parse_args(args, in_lens, opd.op_name, vec_scale, out_lens);
} }
// skipped empty input
auto lens = arg->get_shape().lens();
if(lens.empty())
{
continue;
}
auto type = arg->get_shape().type();
// output size
if(type == shape::int64_type)
{
auto arg_out_s = arg->eval();
check_arg_empty(arg_out_s,
"PARSE_" + opd.op_name + ": dynamic output size is not supported!");
arg_out_s.visit([&](const auto& ol) { out_lens.assign(ol.begin(), ol.end()); });
if(out_lens.size() != in_lens.size())
{
MIGRAPHX_THROW("PARSE_" + opd.op_name +
": specified output size does not match input size");
}
// compute the scale if(in_lens.size() != vec_scale.size())
vec_scale.resize(in_lens.size()); {
std::transform(in_lens.begin(), MIGRAPHX_THROW("PARSE_" + opd.op_name + ": ranks of input and scale are different!");
in_lens.end(), }
out_lens.begin(),
vec_scale.begin(),
[](auto iss, auto oss) { return 1.0 * oss / iss; });
}
else
{
// scale input // if the output was not calculated yet, we update it based on the scales
if(lens[0] == in_lens.size()) if(all_of(out_lens.cbegin(), out_lens.cend(), [](auto o) { return o == 0; }))
{ {
auto arg_scale = arg->eval(); std::transform(
check_arg_empty(arg_scale, in_lens.begin(),
"PARSE_" + opd.op_name + in_lens.end(),
": dynamic input scale is not supported!"); vec_scale.begin(),
out_lens.begin(),
arg_scale.visit([&](const auto& v) { vec_scale.assign(v.begin(), v.end()); }); [&](auto idx, auto scale) { return static_cast<std::size_t>(idx * scale); });
if(in_lens.size() != vec_scale.size())
{
MIGRAPHX_THROW("PARSE_" + opd.op_name +
": ranks of input and scale are different!");
}
std::transform(in_lens.begin(),
in_lens.end(),
vec_scale.begin(),
out_lens.begin(),
[&](auto idx, auto scale) {
return static_cast<std::size_t>(idx * scale);
});
}
}
} }
shape out_s{in_s.type(), out_lens}; shape out_s{in_s.type(), out_lens};
...@@ -288,7 +316,6 @@ struct parse_resize : op_parser<parse_resize> ...@@ -288,7 +316,6 @@ struct parse_resize : op_parser<parse_resize>
// reshape input to one-dimension // reshape input to one-dimension
std::vector<int64_t> rsp_lens = {static_cast<int64_t>(in_s.elements())}; std::vector<int64_t> rsp_lens = {static_cast<int64_t>(in_s.elements())};
args[0] = info.make_contiguous(args[0]);
auto rsp = info.add_instruction(make_op("reshape", {{"dims", rsp_lens}}), args[0]); auto rsp = info.add_instruction(make_op("reshape", {{"dims", rsp_lens}}), args[0]);
if(mode == "nearest") if(mode == "nearest")
......
...@@ -39,15 +39,17 @@ struct parse_scatternd : op_parser<parse_scatternd> ...@@ -39,15 +39,17 @@ struct parse_scatternd : op_parser<parse_scatternd>
const onnx_parser::node_info& info, const onnx_parser::node_info& info,
std::vector<instruction_ref>& args) const std::vector<instruction_ref>& args) const
{ {
std::string reduction = "none";
if(contains(info.attributes, "reduction")) if(contains(info.attributes, "reduction"))
{ {
if(info.attributes.at("reduction").s() == "add") reduction = info.attributes.at("reduction").s();
return info.add_instruction(migraphx::make_op("scatternd_add"), args); if(not contains({"none", "add", "mul", "min", "max"}, reduction))
if(info.attributes.at("reduction").s() == "mul") {
return info.add_instruction(migraphx::make_op("scatternd_mul"), args); MIGRAPHX_THROW("PARSE_SCATTERND: unsupported reduction mode " + reduction);
}
} }
return info.add_instruction(migraphx::make_op("scatternd_none"), args); return info.add_instruction(migraphx::make_op("scatternd_" + reduction), args);
} }
}; };
......
...@@ -46,6 +46,9 @@ struct parse_slice : op_parser<parse_slice> ...@@ -46,6 +46,9 @@ struct parse_slice : op_parser<parse_slice>
void always_insert(instruction_ref arg) { op_args.insert(op_args.begin(), arg); } void always_insert(instruction_ref arg) { op_args.insert(op_args.begin(), arg); }
/**
* Either insert argument into `this->op_args` or return the constant value of the argument
*/
std::vector<int64_t> insert(instruction_ref arg) std::vector<int64_t> insert(instruction_ref arg)
{ {
std::vector<int64_t> result; std::vector<int64_t> result;
...@@ -144,16 +147,15 @@ struct parse_slice : op_parser<parse_slice> ...@@ -144,16 +147,15 @@ struct parse_slice : op_parser<parse_slice>
sd.op.axes = axes; sd.op.axes = axes;
} }
if(not sd.steps.empty()) if(std::any_of(sd.steps.begin(), sd.steps.end(), [](auto s) { return s != 1; }))
{ {
if(sd.op.starts.empty() or sd.op.ends.empty()) if(sd.op.starts.empty() or sd.op.ends.empty())
MIGRAPHX_THROW("PARSE_SLICE: steps and variable starts and ends is not supported"); MIGRAPHX_THROW(
"PARSE_SLICE: steps and variable starts and/or ends is not supported");
if(sd.op.axes.empty()) if(sd.op.axes.empty())
MIGRAPHX_THROW("PARSE_SLICE: steps and variable axes is not supported"); MIGRAPHX_THROW("PARSE_SLICE: steps and variable axes is not supported");
} }
assert(sd.steps.empty() or sd.steps.size() == sd.op.axes.size());
// If any axes have negative step, prepare to add a "reverse" op // If any axes have negative step, prepare to add a "reverse" op
for(auto i : range(sd.steps.size())) for(auto i : range(sd.steps.size()))
{ {
......
...@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split> ...@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
// no split attribute, input is equally divided // no split attribute, input is equally divided
else else
{ {
if((lens[tuned_axis] % info.num_outputs) != 0) std::size_t num_outputs = info.num_outputs;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if(contains(info.attributes, "num_outputs"))
{ {
MIGRAPHX_THROW("PARSE_SPLIT: input cannot be equally divided into " + num_outputs =
std::to_string(info.num_outputs) + " splits!"); parser.parse_value(info.attributes.at("num_outputs")).at<std::size_t>();
if(num_outputs != info.num_outputs)
{
MIGRAPHX_THROW("PARSE_SPLIT: num_outputs attribute " +
std::to_string(num_outputs) +
" doesn't match actual number of outputs " +
std::to_string(info.num_outputs) + "!");
}
}
if(lens[tuned_axis] % num_outputs == 0)
{
std::size_t chunk_size = lens[tuned_axis] / num_outputs;
vec_splits.resize(num_outputs, chunk_size);
}
else
{
std::size_t chunk_size = lens[tuned_axis] / num_outputs + 1;
std::size_t last_chunk_size = lens[tuned_axis] - chunk_size * (num_outputs - 1);
vec_splits.resize(num_outputs - 1, chunk_size);
vec_splits.push_back(last_chunk_size);
} }
auto dl = lens[tuned_axis] / info.num_outputs;
vec_splits.resize(info.num_outputs, dl);
} }
if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) != if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) !=
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <optional>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
// generate unique output stream y, given input stream x;
//
// case unsorted:
// input x: [2, 1, 1, 3, 4, 3], attr_sorted = 0;
// output(s):
// y: [2, 1, 3, 4] --- the unique output
// y_indices: [0, 1, 3, 4] --- first incidence, in terms of indices of x
// x_rev_indices: [0, 1, 1, 2, 3, 2] --- x seen in terms of indices of y
// y_count: [1, 2, 2, 1] -- count at each y_index. sum = len(x)
//
// case sorted:
// input x: [2, 1, 1, 3, 4, 3], attr_sorted = 1;
// output(s):
// y: [1, 2, 3, 4] --- the unique output
// y_indices: [1, 0, 3, 4] --- first incidence, in terms of indices of x
// x_rev_indices: [1, 0, 0, 2, 3, 2] --- x seen in terms of indices of y
// y_count: [2, 1, 2, 1] -- count at each y_index. sum = len(x)
struct parse_unique : op_parser<parse_unique>
{
std::vector<op_desc> operators() const { return {{"Unique"}}; }
std::vector<instruction_ref> parse(const op_desc& opd,
const onnx_parser& parser,
const onnx_parser::node_info& info,
std::vector<instruction_ref> args) const
{
int64_t sorted = 1; // default = sorted.
if(contains(info.attributes, "sorted"))
sorted = parser.parse_value(info.attributes.at("sorted")).at<int>();
std::optional<int64_t> axis;
if(contains(info.attributes, "axis"))
{
auto n_dim = args[0]->get_shape().ndim();
axis = parser.parse_value(info.attributes.at("axis")).at<int>();
axis = tune_axis(n_dim, *axis, opd.op_name);
}
migraphx::argument data_arg = args.back()->eval();
auto opr = axis ? migraphx::make_op("unique", {{"axis", *axis}, {"sorted", sorted}})
: migraphx::make_op("unique", {{"sorted", sorted}});
auto u_opr = info.add_instruction(opr, args.at(0));
auto i_y = info.add_instruction(make_op("get_tuple_elem", {{"index", 0}}), u_opr);
auto i_y_idx = info.add_instruction(make_op("get_tuple_elem", {{"index", 1}}), u_opr);
auto i_x_idx = info.add_instruction(make_op("get_tuple_elem", {{"index", 2}}), u_opr);
auto i_count = info.add_instruction(make_op("get_tuple_elem", {{"index", 3}}), u_opr);
return {i_y, i_y_idx, i_x_idx, i_count};
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/pooling.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/padding.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/ranges.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
value handle_pooling_values(const op_desc& opd,
onnx_parser::node_info info,
const shape& in_shape,
value values)
{
auto kdims = in_shape.ndim() - 2;
if(starts_with(opd.onnx_name, "Global") or starts_with(opd.onnx_name, "QLinearGlobal"))
{
// if spatial dimensions are dynamic use dyn_global flag
if(in_shape.dynamic() and std::any_of(in_shape.dyn_dims().cbegin() + 2,
in_shape.dyn_dims().cend(),
[](auto dd) { return not dd.is_fixed(); }))
{
values["dyn_global"] = true;
values["lengths"] = std::vector<size_t>();
}
else
{
// works with static and fixed dynamic shape
auto m_lens = in_shape.max_lens();
values["lengths"] = std::vector<size_t>(m_lens.begin() + 2, m_lens.end());
}
}
if(contains(info.attributes, "ceil_mode"))
{
values["ceil_mode"] = static_cast<bool>(info.attributes.at("ceil_mode").i());
}
if(contains(info.attributes, "strides"))
{
values["stride"].clear();
copy(info.attributes["strides"].ints(), std::back_inserter(values["stride"]));
check_attr_sizes(kdims, values["stride"].size(), "PARSE_POOLING: inconsistent strides");
}
if(contains(info.attributes, "kernel_shape"))
{
values["lengths"].clear();
copy(info.attributes["kernel_shape"].ints(), std::back_inserter(values["lengths"]));
check_attr_sizes(kdims, values["lengths"].size(), "PARSE_POOLING: inconsistent lengths");
}
if(contains(info.attributes, "dilations"))
{
values["dilations"].clear();
copy(info.attributes["dilations"].ints(), std::back_inserter(values["dilations"]));
check_attr_sizes(
kdims, values["dilations"].size(), "PARSE_POOLING: inconsistent dilations");
}
// lp_order attribute
if(contains(info.attributes, "p"))
{
values["lp_order"] = info.attributes.at("p").i();
}
// ensure pads available only when auto_pad is "NOT_SET"
check_padding_mode(info, "POOLING");
return values;
}
instruction_ref add_pooling_op(const op_desc& opd, onnx_parser::node_info info, instruction_ref l0)
{
std::string mode = opd.op_name;
const std::unordered_map<std::string, op::pooling_mode> mode_map = {
{"max", op::pooling_mode::max},
{"average", op::pooling_mode::average},
{"lpnorm", op::pooling_mode::lpnorm}};
if(not contains(mode_map, mode))
{
MIGRAPHX_THROW(
"PARSE_POOLING: onnx pooling mode must be [\"max\", \"average\", \"lpnorm\"]");
}
operation op = make_op("pooling", {{"mode", mode_map.at(mode)}});
value values = op.to_value();
auto in_shape = l0->get_shape();
assert(in_shape.ndim() > 2);
auto kdims = in_shape.ndim() - 2;
values = handle_pooling_values(opd, info, in_shape, values);
// count include padding, if count include pad is 1, we always use
// explicit pad
int count_include_pad = 0;
if(contains(info.attributes, "count_include_pad"))
{
if(in_shape.dynamic())
{
MIGRAPHX_THROW("PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape");
}
count_include_pad = info.attributes.at("count_include_pad").i();
}
std::vector<int64_t> paddings;
float pad_val = ((mode == "max") ? std::numeric_limits<float>::lowest() : 0.0f);
if(contains(info.attributes, "pads"))
{
values["padding"].clear();
copy(info.attributes["pads"].ints(), std::back_inserter(paddings));
check_attr_sizes(
kdims, paddings.size() / 2, "PARSE_POOLING: inconsistent explicit paddings");
}
if(paddings.size() != 2 * kdims)
{
paddings.resize(kdims * 2);
std::fill_n(paddings.begin(), 2 * kdims, 0);
}
if(values["padding"].size() != kdims)
{
values["padding"].resize(kdims);
std::fill_n(values["padding"].begin(), kdims, 0);
}
if(values["stride"].size() != kdims)
{
values["stride"].resize(kdims);
std::fill_n(values["stride"].begin(), kdims, 1);
}
if(values["dilations"].size() != kdims)
{
values["dilations"].resize(kdims);
std::fill_n(values["dilations"].begin(), kdims, 1);
}
// used to calculate the supposed output shape
std::vector<int64_t> orig_padding = paddings;
// TODO: add parsing for dilations
if(contains(info.attributes, "auto_pad") and
to_upper(info.attributes["auto_pad"].s()) != "NOTSET")
{
auto auto_pad = to_upper(info.attributes["auto_pad"].s());
// don't use the given padding sizes, if any
// values["padding"].clear();
if(in_shape.dynamic())
{
// set padding_mode to trigger auto padding at runtime
bool is_same_upper = (auto_pad.find("SAME_UPPER") != std::string::npos);
values["padding_mode"] = is_same_upper ? to_value(op::padding_mode_t::same_upper)
: to_value(op::padding_mode_t::same_lower);
}
else
{
// Calculate auto padding
// dilations (argument 4) not supported; default to all 1's
cal_auto_padding_size(info,
values,
values["lengths"].to_vector<std::size_t>(),
values["dilations"].to_vector<std::size_t>(),
in_shape.lens(),
paddings);
values["padding"] = paddings;
// default padding_mode indicates that padding sizes are not calculated dynamically
values["padding_mode"] = migraphx::op::padding_mode_t::default_;
}
}
std::vector<int64_t> slice_start;
std::vector<int64_t> slice_end;
tune_padding_size(values, paddings, count_include_pad, slice_start);
if(not slice_start.empty())
{
if(in_shape.dynamic())
{
MIGRAPHX_THROW(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape");
}
// calculate expected output shape
orig_padding.insert(orig_padding.begin() + kdims, 2, 0);
orig_padding.insert(orig_padding.begin(), 2, 0);
op::pad pad{orig_padding, 0.0f};
shape padded_shape = pad.compute_shape({l0->get_shape()});
// make an op just to get its output shape
auto out_lens = make_op("pooling", values).compute_shape({padded_shape}).lens();
// compute slice_end information
slice_end.resize(slice_start.size());
std::transform(out_lens.begin() + 2,
out_lens.end(),
slice_start.begin(),
slice_end.begin(),
[](auto i, auto j) { return i + j; });
}
values["padding"] = std::vector<size_t>(paddings.begin(), paddings.end());
check_asym_padding(info, l0, paddings, values, count_include_pad, pad_val);
op.from_value(values);
auto l1 = info.add_instruction(op, l0);
if(not slice_start.empty())
{
std::vector<int64_t> axes(kdims);
std::iota(axes.begin(), axes.end(), 2);
l1 = info.add_instruction(
make_op("slice", {{"axes", axes}, {"starts", slice_start}, {"ends", slice_end}}), l1);
}
return l1;
}
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/op/common.hpp> #include <migraphx/op/common.hpp>
#include <migraphx/float8.hpp>
#ifdef HAVE_GPU #ifdef HAVE_GPU
#include <migraphx/gpu/hip.hpp> #include <migraphx/gpu/hip.hpp>
#endif #endif
...@@ -144,6 +144,18 @@ struct npy_format_descriptor<half> ...@@ -144,6 +144,18 @@ struct npy_format_descriptor<half>
static constexpr auto name() { return _("half"); } static constexpr auto name() { return _("half"); }
}; };
template <>
struct npy_format_descriptor<migraphx::fp8::fp8e4m3fnuz>
{
static std::string format()
{
// following: https://docs.python.org/3/library/struct.html#format-characters
// TODO: need to figure out correct encoding
return "z";
}
static constexpr auto name() { return _("fp8e4m3fnuz"); }
};
} // namespace detail } // namespace detail
} // namespace pybind11 } // namespace pybind11
...@@ -472,7 +484,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -472,7 +484,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
map_dyn_input_dims, map_dyn_input_dims,
bool skip_unknown_operators, bool skip_unknown_operators,
bool print_program_on_error, bool print_program_on_error,
int64_t max_loop_iterations) { int64_t max_loop_iterations,
int64_t limit_max_iterations) {
migraphx::onnx_options options; migraphx::onnx_options options;
options.default_dim_value = default_dim_value; options.default_dim_value = default_dim_value;
options.default_dyn_dim_value = default_dyn_dim_value; options.default_dyn_dim_value = default_dyn_dim_value;
...@@ -481,6 +494,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -481,6 +494,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
options.skip_unknown_operators = skip_unknown_operators; options.skip_unknown_operators = skip_unknown_operators;
options.print_program_on_error = print_program_on_error; options.print_program_on_error = print_program_on_error;
options.max_loop_iterations = max_loop_iterations; options.max_loop_iterations = max_loop_iterations;
options.limit_max_iterations = limit_max_iterations;
return migraphx::parse_onnx(filename, options); return migraphx::parse_onnx(filename, options);
}, },
"Parse onnx file", "Parse onnx file",
...@@ -492,7 +506,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -492,7 +506,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
std::unordered_map<std::string, std::vector<migraphx::shape::dynamic_dimension>>(), std::unordered_map<std::string, std::vector<migraphx::shape::dynamic_dimension>>(),
py::arg("skip_unknown_operators") = false, py::arg("skip_unknown_operators") = false,
py::arg("print_program_on_error") = false, py::arg("print_program_on_error") = false,
py::arg("max_loop_iterations") = 10); py::arg("max_loop_iterations") = 10,
py::arg("limit_max_iterations") = std::numeric_limits<uint16_t>::max());
m.def( m.def(
"parse_onnx_buffer", "parse_onnx_buffer",
...@@ -565,7 +580,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m) ...@@ -565,7 +580,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
py::arg("prog"), py::arg("prog"),
py::arg("t"), py::arg("t"),
py::arg("calibration") = std::vector<migraphx::parameter_map>{}, py::arg("calibration") = std::vector<migraphx::parameter_map>{},
py::arg("ins_names") = std::vector<std::string>{"dot", "convolution"}); py::arg("ins_names") = std::unordered_set<std::string>{"dot", "convolution"});
#ifdef HAVE_GPU #ifdef HAVE_GPU
m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false); m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false);
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <migraphx/instruction_ref.hpp> #include <migraphx/instruction_ref.hpp>
#include <migraphx/quantization.hpp> #include <migraphx/quantization.hpp>
#include <migraphx/quantize_fp16.hpp> #include <migraphx/quantize_fp16.hpp>
#include <migraphx/quantize_int8.hpp> #include <migraphx/quantize_8bits.hpp>
#include <migraphx/simplify_reshapes.hpp> #include <migraphx/simplify_reshapes.hpp>
#include <migraphx/simplify_qdq.hpp> #include <migraphx/simplify_qdq.hpp>
#include <migraphx/eliminate_common_subexpression.hpp> #include <migraphx/eliminate_common_subexpression.hpp>
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_INT8_QUANTIZATION_PARAMS) MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_8BITS_QUANTIZATION_PARAMS)
// This function is to convert any instructions specified in the input // This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator. // from double or float to float16 by inserting a convert operator.
...@@ -57,29 +57,22 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names) ...@@ -57,29 +57,22 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
run_passes(prog, {optimize_module{}, quantize_fp16_pass{ins_names}, optimize_module{}}); run_passes(prog, {optimize_module{}, quantize_fp16_pass{ins_names}, optimize_module{}});
} }
void quantize_int8(program& prog, void quantize_8bits(program& prog,
const target& t, const target& t,
const std::vector<parameter_map>& calibration, shape::type_t precision,
const std::vector<std::string>& ins_names) const std::vector<parameter_map>& calibration,
const std::unordered_set<std::string>& ins_names)
{ {
std::set<std::string> op_names = {"convolution", "dot"}; // Run optimize_module() before converting to int8/fp8 to const eval and fold in FP32 to
std::set<std::string> input_ins_names(ins_names.begin(), ins_names.end());
if(not std::includes(
op_names.begin(), op_names.end(), input_ins_names.begin(), input_ins_names.end()))
{
MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
}
// Run optimize_module() before converting to int8 to const eval and fold in FP32 to
// avoid loss of precision. // avoid loss of precision.
run_passes(prog, {optimize_module{}}); run_passes(prog, {optimize_module{}});
std::shared_ptr<std::vector<std::pair<float, float>>> int8_quant_params = std::shared_ptr<std::vector<std::pair<float, float>>> quant_8bit_params =
std::make_shared<std::vector<std::pair<float, float>>>(); std::make_shared<std::vector<std::pair<float, float>>>();
std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>(); std::shared_ptr<std::vector<float>> max_abs_vals = std::make_shared<std::vector<float>>();
auto calc_quant_params = [int8_quant_params, max_abs_vals, &t](std::size_t ins_index, float quantized_range = (precision == shape::type_t::int8_type) ? 127.0 : 240.0;
std::vector<argument> args) { auto calc_quant_params = [&](std::size_t ins_index, std::vector<argument> args) {
std::pair<float, float> param_pair{64.0f, 0.0f}; std::pair<float, float> param_pair{64.0f, 0.0f};
// scale and shift is need for only int8 type, and we do not // scale and shift is need for only int8 type, and we do not
// consider shift, so set shift to 0 // consider shift, so set shift to 0
...@@ -90,23 +83,22 @@ void quantize_int8(program& prog, ...@@ -90,23 +83,22 @@ void quantize_int8(program& prog,
auto min_val = *std::min_element(vec_val.begin(), vec_val.end()); auto min_val = *std::min_element(vec_val.begin(), vec_val.end());
auto max_abs = std::max(std::fabs(max_val), std::fabs(min_val)); auto max_abs = std::max(std::fabs(max_val), std::fabs(min_val));
max_abs_vals->at(ins_index) = std::max(max_abs_vals->at(ins_index), max_abs); max_abs_vals->at(ins_index) = std::max(max_abs_vals->at(ins_index), max_abs);
// if all values are 0, no need to do scaling // if all values are 0, no need to do scaling
if(max_abs_vals->at(ins_index) == 0.0f) if(float_equal(max_abs_vals->at(ins_index), 0.0f))
{ {
param_pair.first = 1.0f; param_pair.first = 1.0f;
} }
else else
{ {
param_pair.first = 127.0f / max_abs_vals->at(ins_index); param_pair.first = quantized_range / max_abs_vals->at(ins_index);
} }
int8_quant_params->at(ins_index) = param_pair; quant_8bit_params->at(ins_index) = param_pair;
}; };
// pass to add capture argument op // pass to add capture argument op
std::size_t param_num = 0; std::size_t param_num = 0;
run_passes(prog, {capture_arguments_pass{ins_names, calc_quant_params, &param_num}}); run_passes(prog, {capture_arguments_pass{ins_names, calc_quant_params, &param_num}});
int8_quant_params->resize(param_num, std::pair<float, float>(64.0f, 0.0f)); quant_8bit_params->resize(param_num, std::pair<float, float>(64.0f, 0.0f));
max_abs_vals->resize(param_num, 0.0f); max_abs_vals->resize(param_num, 0.0f);
// use the calibration data to compute the quantization scale // use the calibration data to compute the quantization scale
...@@ -134,11 +126,11 @@ void quantize_int8(program& prog, ...@@ -134,11 +126,11 @@ void quantize_int8(program& prog,
} }
// print the quantization parameters in only the main module // print the quantization parameters in only the main module
if(enabled(MIGRAPHX_INT8_QUANTIZATION_PARAMS{})) if(enabled(MIGRAPHX_8BITS_QUANTIZATION_PARAMS{}))
{ {
for(std::size_t i = 0; i < int8_quant_params->size(); ++i) for(std::size_t i = 0; i < quant_8bit_params->size(); ++i)
{ {
auto param = int8_quant_params->at(i); auto param = quant_8bit_params->at(i);
std::cout << "ins_index = " << i << ", scale = " << param.first std::cout << "ins_index = " << i << ", scale = " << param.first
<< ", shift = " << param.second << std::endl; << ", shift = " << param.second << std::endl;
} }
...@@ -146,11 +138,44 @@ void quantize_int8(program& prog, ...@@ -146,11 +138,44 @@ void quantize_int8(program& prog,
} }
run_passes(prog, run_passes(prog,
{quantize_int8_pass{ins_names, *int8_quant_params}, {quantize_8bits_pass{precision, *quant_8bit_params},
optimize_module{},
simplify_qdq{}, simplify_qdq{},
optimize_module{},
dead_code_elimination{}}); dead_code_elimination{}});
} }
void quantize_int8(program& prog,
const target& t,
const std::vector<parameter_map>& calibration,
const std::unordered_set<std::string>& ins_names)
{
std::unordered_set<std::string> op_names = {"convolution", "dot"};
if(op_names != ins_names)
{
MIGRAPHX_THROW("QUANTIZE_INT8: only support DOT and CONVOLUTION operation");
}
quantize_8bits(prog, t, shape::int8_type, calibration, ins_names);
}
void quantize_fp8(program& prog, const target& t, const std::vector<parameter_map>& calibration)
{
std::cout << "[Warning] : MIGraphX has BETA support for FP8. Using FP8 may result in "
"incorrect final outputs\n";
std::unordered_set<std::string> supported_ins_names;
auto* mm = prog.get_main_module();
for(auto ins : iterator_for(*mm))
{
if(ins->name() == "convert")
{
continue;
}
if(not starts_with(ins->name(), "@"))
{
supported_ins_names.insert(ins->name());
}
}
quantize_8bits(prog, t, shape::fp8e4m3fnuz_type, calibration, supported_ins_names);
}
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment