Unverified Commit f8bf7bd3 authored by Lakhinder Walia's avatar Lakhinder Walia Committed by GitHub
Browse files

QLinearMatMul (#2308)

Add support for the QLinearMatMul onnx operator
parent 9263d7ad
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/broadcast_qdq.hpp>
#include <migraphx/instruction.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {
/*
*********************************************************************************
* Reference: see QLinearMatMul in *
* https://onnx.ai/onnx/operators/onnx__QLinearMatMul.html *
*********************************************************************************
Matrix product that behaves like numpy.matmul:
https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html. It consumes two
quantized input tensors, their scales and zero points, scale and zero point of output, and computes
the quantized output. The quantization formula is y = saturate((x / y_scale) + y_zero_point). For (x
/ y_scale), it is rounding to nearest ties to even. Refer to https://en.wikipedia.org/wiki/Rounding
for details. Scale and zero point must have same shape. They must be either scalar (per tensor) or
N-D tensor (per row for ‘a’ and per column for ‘b’). Scalar refers to per tensor quantization
whereas N-D refers to per row or per column quantization. If the input is 2D of shape [M, K] then
zero point and scale tensor may be an M element vector [v_1, v_2, …, v_M] for per row quantization
and K element vector of shape [v_1, v_2, …, v_K] for per column quantization. If the input is N-D
tensor with shape [D1, D2, M, K] then zero point and scale tensor may have shape [D1, D2, M, 1] for
per row quantization and shape [D1, D2, 1, K] for per column quantization. Production must never
overflow, and accumulation may overflow if and only if in 32 bits.
Inputs
a (heterogeneous) - T1: N-dimensional quantized matrix a
a_scale (heterogeneous) - tensor(float): scale of quantized input a
a_zero_point (heterogeneous) - T1: zero point of quantized input a
b (heterogeneous) - T2: N-dimensional quantized matrix b
b_scale (heterogeneous) - tensor(float): scale of quantized input b
b_zero_point (heterogeneous) - T2: zero point of quantized input b
y_scale (heterogeneous) - tensor(float): scale of quantized output y
y_zero_point (heterogeneous) - T3: zero point of quantized output y
Outputs
y (heterogeneous) - T3: Quantized matrix multiply results from a * b
Type Constraints
T1 in ( tensor(int8), tensor(uint8) ): Constrain input a and its zero point data type to 8-bit
integer tensor.
T2 in ( tensor(int8), tensor(uint8) ): Constrain input b and its zero point data type to 8-bit
integer tensor.
T3 in ( tensor(int8), tensor(uint8) ): Constrain output y and its zero point data type to 8-bit
integer tensor.
*/
struct parse_qlinearmatmul : op_parser<parse_qlinearmatmul>
{
std::vector<op_desc> operators() const { return {{"QLinearMatMul"}}; }
// basic type checking for QLinearMatMul Operator
void check_inputs(const std::vector<instruction_ref>& args) const
{
if(args.size() < 8)
MIGRAPHX_THROW("QLINEARMATMUL: missing inputs");
const auto& in_a = args[0];
const auto& in_b = args[3];
auto sh_a = in_a->get_shape();
auto sh_b = in_b->get_shape();
auto type_a = sh_a.type();
auto type_b = sh_b.type();
if(type_a != migraphx::shape::int8_type and type_a != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARMATMUL: unsupported input type");
if(type_b != migraphx::shape::int8_type and type_b != migraphx::shape::uint8_type)
MIGRAPHX_THROW("QLINEARMATMUL: unsupported input type");
auto lens_a = sh_a.lens();
auto lens_b = sh_b.lens();
size_t dim_a = lens_a.size();
size_t dim_b = lens_b.size();
if(dim_a == 0 or dim_b == 0)
MIGRAPHX_THROW("QLINEARMATMUL: empty input");
// broadcast supported if either is 1-D -- the other can be a 2-D tensor.
// if it is 1-D, just prepend/append that lens and check further constraints..
if(dim_a == 1)
{
lens_a.insert(lens_a.begin(), 1);
dim_a++;
}
if(dim_b == 1)
{
lens_b.push_back(1);
dim_b++;
}
// 2-D or higher-order mat mul
if(dim_a != dim_b or *lens_a.rbegin() != *(lens_b.rbegin() + 1) or
not std::equal(lens_a.rbegin() + 2, lens_a.rend(), lens_b.rbegin() + 2, lens_b.rend()))
MIGRAPHX_THROW("QLINEARMATMUL: mismatched input dimensions");
if(migraphx::any_of({args[1], args[2], args[4], args[5]},
[](auto arg) { return not arg->get_shape().scalar(); }))
MIGRAPHX_THROW("QLINEARMATMUL: unsupported row/column quantization");
}
instruction_ref parse(const op_desc& /* opd */,
const onnx_parser& /*parser*/,
const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args) const
{
check_inputs(args);
// A
const auto& in_a = args[0];
const auto& in_scale_a = args[1];
const auto& in_zero_pt_a = args[2];
auto dquant_a = bcast_qdq_instr("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info);
// B
const auto& in_b = args[3];
const auto& in_scale_b = args[4];
const auto& in_zero_pt_b = args[5];
auto dquant_b = bcast_qdq_instr("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info);
bool is_a_prepended = false;
bool is_b_appended = false;
// un-squeeze either tensor if 1-D.
if(in_a->get_shape().ndim() == 1)
{
is_a_prepended = true;
dquant_a = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), dquant_a);
}
if(in_b->get_shape().ndim() == 1)
{
is_b_appended = true;
dquant_b = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), dquant_b);
}
// Y = A * B
auto out_y = info.add_instruction(migraphx::make_op("dot"), dquant_a, dquant_b);
// squeeze just once if necessary.. not twice.
if(is_a_prepended)
out_y = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), out_y);
else if(is_b_appended)
out_y = info.add_instruction(make_op("squeeze", {{"axes", {1}}}), out_y);
const auto& scale_y = args[6];
const auto& zero_pt_y = args[7];
return bcast_qdq_instr("quantizelinear", out_y, scale_y, zero_pt_y, info);
}
};
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
...@@ -5283,6 +5283,91 @@ def qlinearglobalavgpool_test(): ...@@ -5283,6 +5283,91 @@ def qlinearglobalavgpool_test():
return ([n], [x], [y], [sc_x, z_pt_x, sc_y, z_pt_y]) return ([n], [x], [y], [sc_x, z_pt_x, sc_y, z_pt_y])
def qlinearmatmul_1D_test():
a = helper.make_tensor_value_info('A', TensorProto.UINT8, [8])
sc_a = helper.make_tensor('A_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_a = helper.make_tensor('A_zero_point', TensorProto.UINT8, [], [0])
b = helper.make_tensor_value_info('B', TensorProto.UINT8, [8])
sc_b = helper.make_tensor('B_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_b = helper.make_tensor('B_zero_point', TensorProto.UINT8, [],
[128])
sc_c = helper.make_tensor('C_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_c = helper.make_tensor('C_zero_point', TensorProto.UINT8, [], [64])
c = helper.make_tensor_value_info('C', TensorProto.UINT8, [1])
node = onnx.helper.make_node(
'QLinearMatMul',
inputs=[
'A', 'A_scale', 'A_zero_point', 'B', 'B_scale', 'B_zero_point',
'C_scale', 'C_zero_point'
],
outputs=['C'],
)
return ([node], [a, b], [c],
[sc_a, zero_pt_a, sc_b, zero_pt_b, sc_c, zero_pt_c])
@onnx_test()
def qlinearmatmul_2D_test():
a = helper.make_tensor_value_info('A', TensorProto.UINT8, [1, 8])
sc_a = helper.make_tensor('A_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_a = helper.make_tensor('A_zero_point', TensorProto.UINT8, [], [0])
b = helper.make_tensor_value_info('B', TensorProto.UINT8, [8, 1])
sc_b = helper.make_tensor('B_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_b = helper.make_tensor('B_zero_point', TensorProto.UINT8, [],
[128])
sc_c = helper.make_tensor('C_scale', TensorProto.FLOAT, [], [0.05])
zero_pt_c = helper.make_tensor('C_zero_point', TensorProto.UINT8, [], [64])
c = helper.make_tensor_value_info('C', TensorProto.UINT8, [1, 1])
node = onnx.helper.make_node(
'QLinearMatMul',
inputs=[
'A', 'A_scale', 'A_zero_point', 'B', 'B_scale', 'B_zero_point',
'C_scale', 'C_zero_point'
],
outputs=['C'],
)
return ([node], [a, b], [c],
[sc_a, zero_pt_a, sc_b, zero_pt_b, sc_c, zero_pt_c])
@onnx_test()
def qlinearmatmul_3D_test():
a = helper.make_tensor_value_info('A', TensorProto.UINT8, [2, 2, 4])
sc_a = helper.make_tensor('A_scale', TensorProto.FLOAT, [], [0.0066])
zero_pt_a = helper.make_tensor('A_zero_point', TensorProto.UINT8, [],
[113])
b = helper.make_tensor_value_info('B', TensorProto.UINT8, [2, 4, 3])
sc_b = helper.make_tensor('B_scale', TensorProto.FLOAT, [], [0.00705])
zero_pt_b = helper.make_tensor('B_zero_point', TensorProto.UINT8, [],
[114])
sc_c = helper.make_tensor('C_scale', TensorProto.FLOAT, [], [0.0107])
zero_pt_c = helper.make_tensor('C_zero_point', TensorProto.UINT8, [],
[118])
c = helper.make_tensor_value_info('C', TensorProto.UINT8, [2, 2, 3])
node = onnx.helper.make_node(
'QLinearMatMul',
inputs=[
'A', 'A_scale', 'A_zero_point', 'B', 'B_scale', 'B_zero_point',
'C_scale', 'C_zero_point'
],
outputs=['C'],
)
return ([node], [a, b], [c],
[sc_a, zero_pt_a, sc_b, zero_pt_b, sc_c, zero_pt_c])
@onnx_test() @onnx_test()
def quantizelinear_test(): def quantizelinear_test():
arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [5]) arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [5])
......
...@@ -5004,6 +5004,118 @@ TEST_CASE(qlinearglobalavgpool_test) ...@@ -5004,6 +5004,118 @@ TEST_CASE(qlinearglobalavgpool_test)
EXPECT(p.sort() == prog.sort()); EXPECT(p.sort() == prog.sort());
} }
TEST_CASE(qlinearmatmul_1D_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto a = mm->add_parameter("A", {migraphx::shape::uint8_type, {8}});
auto b = mm->add_parameter("B", {migraphx::shape::uint8_type, {8}});
auto sc_a = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_a = mm->add_literal(migraphx::literal{migraphx::shape::uint8_type, {0}});
auto sc_b = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_b = mm->add_literal(migraphx::literal{migraphx::shape::uint8_type, {128}});
auto sc_c = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_c = mm->add_literal(migraphx::literal{migraphx::shape::uint8_type, {64}});
auto scale_a_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {8}}}), sc_a);
auto z_pt_a_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {8}}}), z_pt_a);
auto fp_a =
mm->add_instruction(migraphx::make_op("dequantizelinear"), a, scale_a_bcast, z_pt_a_bcast);
auto scale_b_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {8}}}), sc_b);
auto z_pt_b_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {8}}}), z_pt_b);
auto fp_b =
mm->add_instruction(migraphx::make_op("dequantizelinear"), b, scale_b_bcast, z_pt_b_bcast);
auto sq_a = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), fp_a);
auto sq_b = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1}}}), fp_b);
auto fp_c = mm->add_instruction(migraphx::make_op("dot"), sq_a, sq_b);
auto sq_c = mm->add_instruction(migraphx::make_op("squeeze", {{"axes", {0}}}), fp_c);
auto scale_c_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1}}}), sc_c);
auto z_pt_c_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1}}}), z_pt_c);
auto c =
mm->add_instruction(migraphx::make_op("quantizelinear"), sq_c, scale_c_bcast, z_pt_c_bcast);
mm->add_return({c});
auto prog = migraphx::parse_onnx("qlinearmatmul_1D_test.onnx");
EXPECT(p.sort() == prog.sort());
}
TEST_CASE(qlinearmatmul_2D_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto a = mm->add_parameter("A", {migraphx::shape::uint8_type, {1, 8}});
auto b = mm->add_parameter("B", {migraphx::shape::uint8_type, {8, 1}});
auto sc_a = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_a = mm->add_literal(migraphx::literal{migraphx::shape::uint8_type, {0}});
auto sc_b = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_b = mm->add_literal(migraphx::literal{migraphx::shape::uint8_type, {128}});
auto sc_c = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_c = mm->add_literal(migraphx::literal{migraphx::shape::uint8_type, {64}});
auto scale_a_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 8}}}), sc_a);
auto z_pt_a_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 8}}}), z_pt_a);
auto fp_a =
mm->add_instruction(migraphx::make_op("dequantizelinear"), a, scale_a_bcast, z_pt_a_bcast);
auto scale_b_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {8, 1}}}), sc_b);
auto z_pt_b_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {8, 1}}}), z_pt_b);
auto fp_b =
mm->add_instruction(migraphx::make_op("dequantizelinear"), b, scale_b_bcast, z_pt_b_bcast);
auto fp_c = mm->add_instruction(migraphx::make_op("dot"), fp_a, fp_b);
auto scale_c_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 1}}}), sc_c);
auto z_pt_c_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 1}}}), z_pt_c);
auto c =
mm->add_instruction(migraphx::make_op("quantizelinear"), fp_c, scale_c_bcast, z_pt_c_bcast);
mm->add_return({c});
auto prog = migraphx::parse_onnx("qlinearmatmul_2D_test.onnx");
EXPECT(p.sort() == prog.sort());
}
TEST_CASE(quantizelinear_test) TEST_CASE(quantizelinear_test)
{ {
migraphx::program p; migraphx::program p;
......
qlinearmatmul_3D_test:
]
A
A_scale
A_zero_point
B
B_scale
B_zero_point
C_scale
C_zero_pointC" QLinearMatMulqlinearmatmul_3D_test*"D;BA_scale**qB A_zero_point*";BB_scale**rB B_zero_point*"O/<BC_scale**vB C_zero_pointZ
A



Z
B



b
C



B
\ No newline at end of file
...@@ -1270,7 +1270,7 @@ TEST_CASE(qlinearadd_test) ...@@ -1270,7 +1270,7 @@ TEST_CASE(qlinearadd_test)
pp["B"] = migraphx::argument(b, data_b.data()); pp["B"] = migraphx::argument(b, data_b.data());
auto result = p.eval(pp).back(); auto result = p.eval(pp).back();
std::vector<unsigned char> result_vector; std::vector<uint8_t> result_vector;
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
std::vector<uint8_t> gold = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, std::vector<uint8_t> gold = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
...@@ -1451,6 +1451,82 @@ TEST_CASE(qlinearglobalavgpool_test) ...@@ -1451,6 +1451,82 @@ TEST_CASE(qlinearglobalavgpool_test)
EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
} }
TEST_CASE(qlinearmatmul_1D_test)
{
migraphx::program p = migraphx::parse_onnx("qlinearmatmul_1D_test.onnx");
p.compile(migraphx::make_target("ref"));
migraphx::shape a{migraphx::shape::uint8_type, {8}};
std::vector<uint8_t> data_a = {2, 4, 6, 8, 10, 12, 14, 16};
migraphx::shape b{migraphx::shape::uint8_type, {8}};
std::vector<uint8_t> data_b = {126, 130, 124, 132, 122, 134, 120, 136};
migraphx::parameter_map pp;
pp["A"] = migraphx::argument(a, data_a.data());
pp["B"] = migraphx::argument(b, data_b.data());
auto result = p.eval(pp).back();
std::vector<uint8_t> result_vector;
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
std::vector<uint8_t> gold = {66};
EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
}
TEST_CASE(qlinearmatmul_2D_test)
{
migraphx::program p = migraphx::parse_onnx("qlinearmatmul_2D_test.onnx");
p.compile(migraphx::make_target("ref"));
migraphx::shape a{migraphx::shape::uint8_type, {1, 8}};
std::vector<uint8_t> data_a = {2, 4, 6, 8, 10, 12, 14, 16};
migraphx::shape b{migraphx::shape::uint8_type, {8, 1}};
std::vector<uint8_t> data_b = {126, 130, 124, 132, 122, 134, 120, 136};
migraphx::parameter_map pp;
pp["A"] = migraphx::argument(a, data_a.data());
pp["B"] = migraphx::argument(b, data_b.data());
auto result = p.eval(pp).back();
std::vector<uint8_t> result_vector;
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
std::vector<uint8_t> gold = {66};
EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
}
TEST_CASE(qlinearmatmul_3D_test)
{
// https://xadupre.github.io/draft/onnx/onnx_doc_folder/onnx__QLinearMatMul.html
migraphx::program p = migraphx::parse_onnx("qlinearmatmul_3D_test.onnx");
p.compile(migraphx::make_target("ref"));
migraphx::shape a{migraphx::shape::uint8_type, {2, 2, 4}};
std::vector<uint8_t> data_a = {
208, 236, 0, 238, 3, 214, 255, 29, 208, 236, 0, 238, 3, 214, 255, 29};
migraphx::shape b{migraphx::shape::uint8_type, {2, 4, 3}};
std::vector<uint8_t> data_b = {152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247,
152, 51, 244, 60, 26, 255, 0, 127, 246, 127, 254, 247};
migraphx::parameter_map pp;
pp["A"] = migraphx::argument(a, data_a.data());
pp["B"] = migraphx::argument(b, data_b.data());
auto result = p.eval(pp).back();
std::vector<uint8_t> result_vector;
result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
std::vector<uint8_t> gold = {168, 115, 255, 1, 66, 151, 168, 115, 255, 1, 66, 151};
EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
}
TEST_CASE(resize_downsample_f_test) TEST_CASE(resize_downsample_f_test)
{ {
migraphx::program p = migraphx::parse_onnx("resize_downsample_f_test.onnx"); migraphx::program p = migraphx::parse_onnx("resize_downsample_f_test.onnx");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment