Add QLinearSigmoid and QLinearLeakyRelu ops (#2443)

bac7436b · Zakor Gyula · GitHub · 89215595 · bac7436b · bac7436b
Unverified Commit bac7436b authored Nov 22, 2023 by Zakor Gyula Committed by GitHub Nov 22, 2023
6 changed files
--- a/src/onnx/parse_qlinearunary.cpp
+++ b/src/onnx/parse_qlinearunary.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/common.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/onnx/checks.hpp>
+#include <migraphx/onnx/broadcast_qdq.hpp>
+#include <migraphx/op/pooling.hpp>
+#include <migraphx/instruction.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+/*
+ *********************************************************************************
+ *  Reference: see QLinearSigmoid, QLinearLeakyRelu in                           *
+ *  https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md  *
+ *********************************************************************************
+com.microsoft.QLinearSigmoid
+QLinearSigmoid takes quantized input data (Tensor), and quantize parameter for output, and produces
+one output data (Tensor) where the function f(x) = quantize(Sigmoid(dequantize(x))), is applied to
+the data tensor elementwise. Where the function Sigmoid(x) = 1 / (1 + exp(-x))
+Version
+This version of the operator has been available since version 1 of the 'com.microsoft' operator
+set.
+*****************************************************************************************************
+com.microsoft.QLinearLeakyRelu
+QLinearLeakyRelu takes quantized input data (Tensor), an argument alpha, and quantize parameter for
+output, and produces one output data (Tensor) where the function f(x) = quantize(alpha *
+dequantize(x)) for dequantize(x) < 0, f(x) = quantize(dequantize(x)) for dequantize(x) >= 0, is
+applied to the data tensor elementwise.
+Version
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+Attributes
+alpha : float
+Coefficient of leakage.
+******************************************************************************************************
+Generic input layout of QLinear unary operators:
+Inputs (4 - 5)
+X : T
+Input tensor
+X_scale : tensor(float)
+Input X's scale. It's a scalar, which means a per-tensor/layer quantization.
+X_zero_point (optional) : T
+Input X's zero point. Default value is 0 if it's not specified. It's a scalar, which means a
+per-tensor/layer quantization.
+Y_scale : tensor(float) Output Y's scale. It's a scalar, which means
+a per-tensor/layer quantization.
+Y_zero_point (optional) : T Output Y's zero point. Default value is
+0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.
+Outputs
+Y : T
+Output tensor
+Type Constraints
+T : tensor(uint8), tensor(int8)
+Constrain input and output types to 8 bit tensors.
+*/
+struct parse_qlinearunary : op_parser<parse_qlinearunary>
+{
+    std::vector<op_desc> operators() const
+    {
+        return {{"QLinearSigmoid", "sigmoid"}, {"QLinearLeakyRelu", "leaky_relu"}};
+    }
+    void check_inputs(const op_desc& opd, const std::vector<instruction_ref>& args) const
+    {
+        if(args.size() < 4)
+            MIGRAPHX_THROW(opd.op_name + ": missing inputs");
+        const auto& in_x = args[0];
+        auto sh_x   = in_x->get_shape();
+        auto type_x = sh_x.type();
+        if(type_x != migraphx::shape::int8_type and type_x != migraphx::shape::uint8_type)
+            MIGRAPHX_THROW(opd.op_name + ": unsupported input type");
+    }
+    instruction_ref parse(const op_desc& opd,
+                          const onnx_parser& parser,
+                          const onnx_parser::node_info& info,
+                          const std::vector<instruction_ref>& args) const
+    {
+        check_inputs(opd, args);
+        // X
+        const auto& in_x         = args[0];
+        const auto& in_scale_x   = args[1];
+        const auto& in_zero_pt_x = args[2];
+        auto dquant_x = bcast_qdq_instr("dequantizelinear", in_x, in_scale_x, in_zero_pt_x, info);
+        // Y = (op(dequantizelinear(x))
+        auto op = parser.load(opd.op_name, info);
+        auto y  = info.add_instruction(op, dquant_x);
+        const auto& in_scale_y = args[3];
+        // zero_pt for Y is supplied as the last optional argument..
+        if(args.size() == 5)
+            return (bcast_qdq_instr("quantizelinear", y, in_scale_y, args[4], info));
+        // if no zero_pt: just broadcast the scale..
+        auto bcast_scale_sigm = bcast_scalar_instr(y->get_shape(), in_scale_y, info);
+        return (info.add_instruction(migraphx::make_op("quantizelinear"), y, bcast_scale_sigm));
+    }
+};
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -6126,6 +6126,26 @@ def qlinearglobalavgpool_test():
    return ([n], [x], [y], [sc_x, z_pt_x, sc_y, z_pt_y])
+@onnx_test()
+def qlinearleakyrelu_test():
+    x = helper.make_tensor_value_info('X', TensorProto.INT8, [64])
+    sc_x = helper.make_tensor('X_scale', TensorProto.FLOAT, [], [0.05])
+    zero_pt_x = helper.make_tensor('X_zero_point', TensorProto.INT8, [], [0])
+    sc_y = helper.make_tensor('Y_scale', TensorProto.FLOAT, [], [0.05])
+    zero_pt_y = helper.make_tensor('Y_zero_point', TensorProto.INT8, [], [10])
+    y = helper.make_tensor_value_info('Y', TensorProto.INT8, [64])
+    node = onnx.helper.make_node(
+        'QLinearLeakyRelu',
+        inputs=['X', 'X_scale', 'X_zero_point', 'Y_scale', 'Y_zero_point'],
+        outputs=['Y'],
+        alpha=1.1,
+    )
+    return ([node], [x], [y], [sc_x, zero_pt_x, sc_y, zero_pt_y])
 def qlinearmatmul_1D_test():
    a = helper.make_tensor_value_info('A', TensorProto.UINT8, [8])
    sc_a = helper.make_tensor('A_scale', TensorProto.FLOAT, [], [0.05])
@@ -6266,6 +6286,26 @@ def qlinearmul_bcast_test():
            [sc_a, zero_pt_a, sc_b, zero_pt_b, sc_c, zero_pt_c])
+@onnx_test()
+def qlinearsigmoid_test():
+    x = helper.make_tensor_value_info('X', TensorProto.INT8, [64])
+    sc_x = helper.make_tensor('X_scale', TensorProto.FLOAT, [], [0.05])
+    zero_pt_x = helper.make_tensor('X_zero_point', TensorProto.INT8, [], [0])
+    sc_y = helper.make_tensor('Y_scale', TensorProto.FLOAT, [], [0.0035])
+    zero_pt_y = helper.make_tensor('Y_zero_point', TensorProto.INT8, [],
+                                   [-128])
+    y = helper.make_tensor_value_info('Y', TensorProto.INT8, [64])
+    node = onnx.helper.make_node(
+        'QLinearSigmoid',
+        inputs=['X', 'X_scale', 'X_zero_point', 'Y_scale', 'Y_zero_point'],
+        outputs=['Y'],
+    )
+    return ([node], [x], [y], [sc_x, zero_pt_x, sc_y, zero_pt_y])
 @onnx_test()
 def quantizelinear_test():
    arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [5])

--- a/test/onnx/onnx_test.cpp
+++ b/test/onnx/onnx_test.cpp
@@ -5695,6 +5695,46 @@ TEST_CASE(qlinearglobalavgpool_test)
    EXPECT(p.sort() == prog.sort());
 }
+TEST_CASE(qlinearleakyrelu_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto x = mm->add_parameter("X", {migraphx::shape::int8_type, {64}});
+    auto sc_x   = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
+    auto z_pt_x = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {0}});
+    auto sc_y   = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
+    auto z_pt_y = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {10}});
+    auto scale_x_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_x);
+    auto z_pt_x_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_x);
+    auto fp_x =
+        mm->add_instruction(migraphx::make_op("dequantizelinear"), x, scale_x_bcast, z_pt_x_bcast);
+    auto fp_y = mm->add_instruction(migraphx::make_op("leaky_relu", {{"alpha", 1.1}}), fp_x);
+    auto scale_y_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_y);
+    auto z_pt_y_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_y);
+    auto y =
+        mm->add_instruction(migraphx::make_op("quantizelinear"), fp_y, scale_y_bcast, z_pt_y_bcast);
+    mm->add_return({y});
+    auto prog = migraphx::parse_onnx("qlinearleakyrelu_test.onnx");
+    EXPECT(p.sort() == prog.sort());
+}
 TEST_CASE(qlinearmatmul_1D_test)
 {
    migraphx::program p;
@@ -5860,6 +5900,46 @@ TEST_CASE(qlinearmul_test)
    EXPECT(p.sort() == prog.sort());
 }
+TEST_CASE(qlinearsigmoid_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto x = mm->add_parameter("X", {migraphx::shape::int8_type, {64}});
+    auto sc_x   = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
+    auto z_pt_x = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {0}});
+    auto sc_y   = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.0035}});
+    auto z_pt_y = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {-128}});
+    auto scale_x_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_x);
+    auto z_pt_x_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_x);
+    auto fp_x =
+        mm->add_instruction(migraphx::make_op("dequantizelinear"), x, scale_x_bcast, z_pt_x_bcast);
+    auto fp_y = mm->add_instruction(migraphx::make_op("sigmoid"), fp_x);
+    auto scale_y_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_y);
+    auto z_pt_y_bcast =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_y);
+    auto y =
+        mm->add_instruction(migraphx::make_op("quantizelinear"), fp_y, scale_y_bcast, z_pt_y_bcast);
+    mm->add_return({y});
+    auto prog = migraphx::parse_onnx("qlinearsigmoid_test.onnx");
+    EXPECT(p.sort() == prog.sort());
+}
 migraphx::instruction_ref insert_quantizelinear_clip(migraphx::module& m,
                                                     const migraphx::instruction_ref ins,
                                                     const migraphx::instruction_ref round,

--- a/test/onnx/qlinearleakyrelu_test.onnx
+++ b/test/onnx/qlinearleakyrelu_test.onnx
--- a/test/onnx/qlinearsigmoid_test.onnx
+++ b/test/onnx/qlinearsigmoid_test.onnx
--- a/test/onnx/verify_onnx.cpp
+++ b/test/onnx/verify_onnx.cpp
@@ -1819,6 +1819,35 @@ TEST_CASE(qlinearglobalavgpool_test)
    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
 }
+TEST_CASE(qlinearleakyrelu_test)
+{
+    // github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QLinearSigmoid
+    migraphx::program p = migraphx::parse_onnx("qlinearleakyrelu_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+    migraphx::shape x{migraphx::shape::int8_type, {64}};
+    std::vector<int8_t> data_x = {
+        -128, -124, -120, -116, -112, -108, -104, -100, -96, -92, -88, -84, -80, -76, -72, -68,
+        -64,  -60,  -56,  -52,  -48,  -44,  -40,  -36,  -32, -28, -24, -20, -16, -12, -8,  -4,
+        0,    4,    8,    12,   16,   20,   24,   28,   32,  36,  40,  44,  48,  52,  56,  60,
+        64,   68,   72,   76,   80,   84,   88,   92,   96,  100, 104, 108, 112, 116, 120, 124};
+    migraphx::parameter_map pp;
+    pp["X"]     = migraphx::argument(x, data_x.data());
+    auto result = p.eval(pp).back();
+    std::vector<int8_t> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+    std::vector<int8_t> gold = {
+        -128, -126, -122, -118, -113, -109, -104, -100, -96, -91, -87, -82, -78, -74, -69, -65,
+        -60,  -56,  -52,  -47,  -43,  -38,  -34,  -30,  -25, -21, -16, -12, -8,  -3,  1,   6,
+        10,   14,   18,   22,   26,   30,   34,   38,   42,  46,  50,  54,  58,  62,  66,  70,
+        74,   78,   82,   86,   90,   94,   98,   102,  106, 110, 114, 118, 122, 126, 127, 127};
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
 TEST_CASE(qlinearmatmul_1D_test)
 {
    migraphx::program p = migraphx::parse_onnx("qlinearmatmul_1D_test.onnx");
@@ -1970,6 +1999,36 @@ TEST_CASE(qlinearmul_bcast_test)
    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
 }
+TEST_CASE(qlinearsigmoid_test)
+{
+    // github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QLinearSigmoid
+    migraphx::program p = migraphx::parse_onnx("qlinearsigmoid_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+    migraphx::shape x{migraphx::shape::int8_type, {64}};
+    std::vector<int8_t> data_x = {
+        -128, -124, -120, -116, -112, -108, -104, -100, -96, -92, -88, -84, -80, -76, -72, -68,
+        -64,  -60,  -56,  -52,  -48,  -44,  -40,  -36,  -32, -28, -24, -20, -16, -12, -8,  -4,
+        0,    4,    8,    12,   16,   20,   24,   28,   32,  36,  40,  44,  48,  52,  56,  60,
+        64,   68,   72,   76,   80,   84,   88,   92,   96,  100, 104, 108, 112, 116, 120, 124};
+    migraphx::parameter_map pp;
+    pp["X"]     = migraphx::argument(x, data_x.data());
+    auto result = p.eval(pp).back();
+    std::vector<int8_t> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+    std::vector<int8_t> gold = {-128, -127, -127, -127, -127, -127, -126, -126, -126, -125, -125,
+                                -124, -123, -122, -120, -119, -117, -114, -112, -108, -104, -99,
+                                -94,  -87,  -80,  -71,  -62,  -51,  -39,  -27,  -13,  1,    15,
+                                29,   43,   56,   69,   81,   92,   101,  110,  117,  124,  127,
+                                127,  127,  127,  127,  127,  127,  127,  127,  127,  127,  127,
+                                127,  127,  127,  127,  127,  127,  127,  127,  127};
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
 TEST_CASE(resize_downsample_f_test)
 {
    migraphx::program p = migraphx::parse_onnx("resize_downsample_f_test.onnx");