manual merge

4ea39116 · Khalique Ahmed · 20128cae · d8011adf · 4ea39116 · 4ea39116
Commit 4ea39116 authored Nov 10, 2023 by Khalique Ahmed
20 changed files
--- a/src/onnx/parse_loop.cpp
+++ b/src/onnx/parse_loop.cpp
@@ -58,6 +58,16 @@ struct parse_loop : op_parser<parse_loop>
            }
        }

+        // cap max_iter because loop uses static shapes with max_iter size and huge numbers
+        // here can cause overflow
+        if(max_iterations > parser.limit_max_iterations)
+        {
+            std::cerr << "WARNING: PARSE_LOOP max_iterations exceeds the maximum loop "
+                         "iterations limit, it will be changed from "
+                      << max_iterations << " to " << parser.limit_max_iterations << ".\n";
+            max_iterations = parser.limit_max_iterations;
+        }
+
        // condition input is empty
        if(args.at(1)->name() == "undefined")
        {

--- a/src/onnx/parse_mean_variance_normalization.cpp
+++ b/src/onnx/parse_mean_variance_normalization.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/onnx/checks.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+
+struct parse_mean_variance_normalization : op_parser<parse_mean_variance_normalization>
+{
+    std::vector<op_desc> operators() const { return {{"MeanVarianceNormalization"}}; }
+
+    instruction_ref parse(const op_desc& /*opd*/,
+                          const onnx_parser& /*parser*/,
+                          onnx_parser::node_info info,
+                          std::vector<instruction_ref> args) const
+    {
+        auto&& data    = args.front();
+        auto data_rank = data->get_shape().ndim();
+        std::vector<int64_t> axes{0, 2, 3};
+
+        if(contains(info.attributes, "axes"))
+        {
+            const auto& axes_attr = info.attributes["axes"].ints();
+            axes.assign(axes_attr.begin(), axes_attr.end());
+        }
+        else if(data_rank != 4)
+        {
+            MIGRAPHX_THROW(
+                "Input tensor needs to be rank 4 when axes is not specified. Instead it is rank " +
+                std::to_string(data_rank));
+        }
+
+        if(axes.size() != data_rank - 1)
+        {
+            MIGRAPHX_THROW("Length of axes array needs to be equal to input tensor rank - 1");
+        }
+
+        auto data_mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), data);
+        auto data_mean_squared = info.add_common_op("mul", data_mean, data_mean);
+
+        auto data_squared = info.add_common_op("mul", data, data);
+        auto data_squared_mean =
+            info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), data_squared);
+
+        auto mean_sub = info.add_common_op("sub", data_squared_mean, data_mean_squared);
+        auto std      = info.add_common_op("sqrt", mean_sub);
+
+        auto dividend = info.add_common_op("sub", data, data_mean);
+        auto epsilon =
+            info.add_literal({data->get_shape().type(),
+                              {data->get_shape().type() == shape::half_type ? 1e-7 : 1e-9}});
+        auto divisor = info.add_common_op("add", std, epsilon);
+
+        return info.add_common_op("div", dividend, divisor);
+    }
+};
+
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_multinomial.cpp
+++ b/src/onnx/parse_multinomial.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -41,6 +41,9 @@ struct parse_multinomial : op_parser<parse_multinomial>
                          const onnx_parser::node_info& info,
                          std::vector<instruction_ref> args) const
    {
+        if(args.empty())
+            MIGRAPHX_THROW("PARSE_MULTINOMIAL: no arguments given");
+
        int dtype = 6;
        if(contains(info.attributes, "dtype"))
            dtype = info.attributes.at("dtype").i();
@@ -49,35 +52,90 @@ struct parse_multinomial : op_parser<parse_multinomial>
        size_t sample_size = 1;
        if(contains(info.attributes, "sample_size"))
            sample_size = info.attributes.at("sample_size").i();
+        else
+            MIGRAPHX_THROW("PARSE_MULTINOMIAL: sample_size not given");
+
+        // Use logarithmic math to scale probabilities while avoiding division by very
+        // small numbers.  Scaling by the maximum makes very tiny ranges more
+        // tractable; any constant factor gives equivalent distr. since the Multinomial op.
+        // normalizes at runtime.

        // Subtract the per-batch maximum log-probability, making the per-batch max 0
        auto maxes =
            info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]);
-        auto mb_maxes = info.add_instruction(
-            migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}),
-            maxes);
-        auto cdf = info.add_instruction(migraphx::make_op("sub"), args[0], mb_maxes);
+        auto cdf = info.add_common_op("sub", args[0], maxes);
        // Take the element-wise exponent to get probabilities in the range (0, 1]
        cdf = info.add_instruction(migraphx::make_op("exp"), cdf);
-        // Compute the cumulative density function
+        // Compute the cumulative distribution function
        cdf = info.add_instruction(
            migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf);

-        // Pre-compute random distribution
-        std::mt19937 gen(std::chrono::high_resolution_clock::now().time_since_epoch().count());
+        instruction_ref seed_input;
        if(contains(info.attributes, "seed"))
-            gen.seed(info.attributes.at("seed").f());
+        {
+            float seed = info.attributes.at("seed").f();
+            migraphx::shape s{migraphx::shape::float_type, {1}};
+            std::vector<float> data = {seed};
+            seed_input              = info.add_literal(migraphx::literal(s, data));
+        }
+        else
+        {
+            seed_input = info.add_instruction(migraphx::make_op("random_seed"));
+        }
+        instruction_ref randoms;
+
+        shape s0 = args[0]->get_shape();
+
+        if(s0.dynamic())
+        {
+            //  Dynamic batch_size will be taken from args[0].  The input argument to this should
+            // have a second dimension of sample_size.
+            std::vector<shape::dynamic_dimension> dyn_dim_set;
+            dyn_dim_set.emplace_back(s0.dyn_dims().front());
+            dyn_dim_set.emplace_back(shape::dynamic_dimension{sample_size, sample_size});
+
+            // read the input dimensions
+            auto dim_of =
+                info.add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), args[0]);
+
+            // The next two operations insert the value sample_size into the second array position
+
+            // make an argument of (1, 0)
+            shape s(shape::int64_type, {2});
+            std::vector<int64_t> data1{1, 0};
+            auto l1        = info.add_literal(s, data1);
+            auto batch_arg = info.add_instruction(migraphx::make_op("mul"), dim_of, l1);
+            std::vector<int64_t> data2(2, 0);
+            // make an argument of (0, sample_size)
+            data2[1]         = sample_size;
+            auto l2          = info.add_literal(s, data2);
+            auto alloc_shape = info.add_instruction(migraphx::make_op("add"), batch_arg, l2);
+            // alloc_shape should contain the input-based shape dimensions as its values at runtime,
+            // and its own shape is {2}
+
+            // compile_shape is the shape used when compiling the Allocate op, and may be dynamic
+            migraphx::shape compile_shape =
+                migraphx::shape(s0.type(), {s0.dyn_dims().front(), {sample_size, sample_size}});

-        std::uniform_real_distribution<> dis(0.0, 1.0);
-        size_t batch_size = args[0]->get_shape().lens().front();
-        migraphx::shape dist_shape{migraphx::shape::float_type, {batch_size, sample_size}};
+            // Allocate on-device storage for the random values
+            auto alloc = info.add_instruction(
+                migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape);
+            randoms = info.add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc);
+        }
+        else
+        {
+            // use literal.  The array populated by random_uniform may have any shape, as long its
+            // number of elements is batch_size * sample_size .
+            size_t batch_size = s0.lens().front();
+            auto rand_dummy   = info.add_literal(
+                migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}});

-        std::vector<float> random_dist(batch_size * sample_size);
-        std::generate(random_dist.begin(), random_dist.end(), [&]() { return dis(gen); });
-        auto dist_lit = info.add_literal(migraphx::literal{dist_shape, random_dist});
+            randoms =
+                info.add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy);
+        }

        return info.add_instruction(
-            migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, dist_lit);
+            migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, randoms);
    }
 };


--- a/src/onnx/parse_pad.cpp
+++ b/src/onnx/parse_pad.cpp
@@ -115,34 +115,9 @@ struct parse_pad : op_parser<parse_pad>
 {
    std::vector<op_desc> operators() const { return {{"Pad"}}; }

-    instruction_ref parse(const op_desc& /*opd*/,
-                          const onnx_parser& parser,
-                          onnx_parser::node_info info,
-                          std::vector<instruction_ref> args) const
+    std::string parse_mode(const onnx_parser::node_info& info,
+                           const std::vector<instruction_ref>& args) const
    {
-        std::vector<int64_t> pads{};
-        if(args.size() >= 2)
-        {
-            auto pad_arg = args.at(1)->eval();
-            check_arg_empty(pad_arg, "PARSE_PAD: pad input must be constant");
-            pad_arg.visit([&](auto v) { pads.assign(v.begin(), v.end()); });
-        }
-        else if(contains(info.attributes, "pads"))
-        {
-            auto&& pad_vals = info.attributes["pads"].ints();
-            pads            = std::vector<int64_t>(pad_vals.begin(), pad_vals.end());
-        }
-        else
-        {
-            MIGRAPHX_THROW("PARSE_PAD: pad must be available");
-        }
-
-        // check if padding is actually being done (at least one value is nonzero)
-        if(std::all_of(pads.begin(), pads.end(), [](const int& i) { return i == 0; }))
-        {
-            return info.add_instruction(make_op("identity"), args.front());
-        }
-
        if(contains(info.attributes, "mode"))
        {
            auto mode = info.attributes.at("mode").s();
@@ -152,28 +127,59 @@ struct parse_pad : op_parser<parse_pad>
                {
                    MIGRAPHX_THROW("PARSE_PAD: reflect padding with dynamic shape not supported");
                }
-                return reflect_pad(info, pads, args.front());
            }
-            if(mode != "constant")
+            else if(mode != "constant")
            {
                MIGRAPHX_THROW(
                    "PARSE_PAD: migraphx currently only supports constant and reflect padding");
            }
+            return mode;
+        }
+        else
+        {
+            // default mode
+            return "constant";
        }
+    }

+    std::vector<int64_t> parse_pads(const onnx_parser::node_info& info,
+                                    const std::vector<instruction_ref>& args) const
+    {
+        std::vector<int64_t> pads{};
+        if(args.size() >= 2)
+        {
+            auto pad_arg = args.at(1)->eval();
+            check_arg_empty(pad_arg, "PARSE_PAD: `pads` input must be constant");
+            pad_arg.visit([&](auto v) { pads.assign(v.begin(), v.end()); });
+        }
+        else if(contains(info.attributes, "pads"))
+        {
+            auto&& pad_vals = info.attributes.at("pads").ints();
+            pads            = std::vector<int64_t>(pad_vals.begin(), pad_vals.end());
+        }
+        else
+        {
+            MIGRAPHX_THROW("PARSE_PAD: `pads` must be available");
+        }
+        return pads;
+    }
+
+    float parse_constant_value(const onnx_parser& parser,
+                               const onnx_parser::node_info& info,
+                               const std::vector<instruction_ref>& args) const
+    {
        float value = 0.0f;
-        // third input is the value
-        if(args.size() == 3)
+        if(args.size() >= 3 and args.at(2)->get_shape().scalar())
        {
            auto val_ins = args.at(2);
            if(not val_ins->can_eval())
            {
-                MIGRAPHX_THROW("PARSE_PAD: input value must be constant");
+                MIGRAPHX_THROW("PARSE_PAD: input `value` must be constant");
            }
            auto val_arg = val_ins->eval();
            if(val_arg.get_shape().elements() != 1)
            {
-                MIGRAPHX_THROW("PARSE_PAD: value should contain only one element");
+                MIGRAPHX_THROW("PARSE_PAD: `value` should contain only one element");
            }
            value = val_arg.at<float>();
        }
@@ -181,6 +187,81 @@ struct parse_pad : op_parser<parse_pad>
        {
            value = parser.parse_value(info.attributes.at("value")).at<float>();
        }
+        return value;
+    }
+
+    std::vector<int64_t> parse_axes(const std::vector<instruction_ref>& args,
+                                    bool is_constant_mode) const
+    {
+        std::vector<int64_t> axes{};
+        // axes is 3rd or 4th, depending on constant mode
+        auto pos = is_constant_mode ? 4 : 3;
+        if(args.size() >= pos)
+        {
+            auto axes_arg = args.at(pos - 1)->eval();
+            check_arg_empty(axes_arg, "PARSE_PAD: variable `axes` input not supported");
+            axes_arg.visit([&](auto v) { axes.assign(v.begin(), v.end()); });
+        }
+        return axes;
+    }
+
+    std::vector<int64_t> calculate_pads_with_axes(const std::vector<int64_t>& pads,
+                                                  const std::vector<int64_t>& axes,
+                                                  size_t input_rank) const
+    {
+        size_t num_axes = axes.size();
+        if(num_axes * 2 != pads.size())
+        {
+            MIGRAPHX_THROW("PARSE_PAD: number of elements of pads should be equal to 2 * "
+                           "number of elements of axes");
+        }
+
+        std::vector<int64_t> new_pads(input_rank * 2);
+        for(size_t idx{0}; idx < num_axes; ++idx)
+        {
+            // axis can be negative
+            int64_t axis = axes[idx] < 0 ? input_rank + axes[idx] : axes[idx];
+            // pad format is x1_begin, x2_begin, ... , x3_end, x4_end
+            new_pads[axis]              = pads[idx];
+            new_pads[axis + input_rank] = pads[idx + num_axes];
+        }
+        return new_pads;
+    }
+
+    instruction_ref parse(const op_desc& /*opd*/,
+                          const onnx_parser& parser,
+                          const onnx_parser::node_info& info,
+                          const std::vector<instruction_ref>& args) const
+    {
+        std::vector<int64_t> pads = parse_pads(info, args);
+
+        // check if padding is actually being done (at least one value is nonzero)
+        if(std::all_of(pads.begin(), pads.end(), [](const int& i) { return i == 0; }))
+        {
+            return info.add_instruction(make_op("identity"), args.front());
+        }
+
+        std::string mode      = parse_mode(info, args);
+        bool is_constant_mode = mode == "constant";
+        float value           = is_constant_mode ? parse_constant_value(parser, info, args) : 0.0f;
+        std::vector<int64_t> axes = parse_axes(args, is_constant_mode);
+        size_t input_rank         = args.front()->get_shape().ndim();
+
+        if(not axes.empty())
+        {
+            pads = calculate_pads_with_axes(pads, axes, input_rank);
+        }
+
+        if(pads.size() != input_rank * 2)
+        {
+            MIGRAPHX_THROW("PARSE_PAD: number of elements of pads should be equal to 2 * "
+                           "input rank");
+        }
+
+        if(mode == "reflect")
+        {
+            return reflect_pad(info, pads, args.front());
+        }

        return info.add_instruction(migraphx::make_op("pad", {{"pads", pads}, {"value", value}}),
                                    args.front());

--- a/src/onnx/parse_pooling.cpp
+++ b/src/onnx/parse_pooling.cpp
@@ -97,7 +97,7 @@ struct parse_pooling : op_parser<parse_pooling>
            values["lp_order"] = info.attributes.at("p").i();
        }

-        // ensure pads availabe only when auto_pad is "NOT_SET"
+        // ensure pads available only when auto_pad is "NOT_SET"
        check_padding_mode(info, "POOLING");

        return values;

--- a/src/onnx/parse_qlinearadd.cpp
+++ b/src/onnx/parse_qlinearadd.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/common.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/onnx/checks.hpp>
+#include <migraphx/onnx/broadcast_qdq.hpp>
+#include <migraphx/instruction.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+
+/*
+ *********************************************************************************
+ *  Reference: see QLinearAdd in                                                 *
+ *  https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md  *
+ *********************************************************************************
+
+  com.microsoft.QLinearAdd
+  Performs element-wise binary addition on 8 bit data types (with Numpy-style broadcasting support).
+
+  C = (A_scale * (A - A_zero_point) + B_scale * (B - B_zero_point))/C_scale + C_zero_point
+
+  Version
+  This version of the operator has been available since version 1 of the 'com.microsoft' operator
+  set.
+
+  Inputs (7 - 8)
+  A : T
+  First operand.
+
+  A_scale : tensor(float)
+  Input A's scale. It's a scalar, which means a per-tensor/layer quantization.
+
+  A_zero_point (optional) : T
+  Input A zero point. Default value is 0 if it's not specified. It's a scalar, which means a
+  per-tensor/layer quantization.
+
+  B : T
+  Second operand.
+
+  B_scale : tensor(float)
+  Input B's scale. It's a scalar, which means a per-tensor/layer quantization.
+
+  B_zero_point (optional) : T
+  Input B zero point. Default value is 0 if it's not specified. It's a scalar, which means a
+  per-tensor/layer quantization.
+
+  C_scale : tensor(float)
+  Output scale. It's a scalar, which means a per-tensor/layer quantization.
+
+  C_zero_point (optional) : T
+
+  Output zero point. Default value is 0 if it's not specified. It's a scalar, which means a
+  per-tensor/layer quantization.
+
+  Outputs
+  C : T
+  Result, has same element type as two inputs
+
+  Type Constraints
+  T : tensor(uint8), tensor(int8)
+  Constrain input and output types to 8 bit signed and unsigned tensors.
+
+*/
+
+struct parse_qlinearadd : op_parser<parse_qlinearadd>
+{
+    std::vector<op_desc> operators() const { return {{"QLinearAdd"}}; }
+
+    // basic type checking for QLinearAdd Operator
+    void check_inputs(const std::vector<instruction_ref>& args) const
+    {
+        if(args.size() < 7)
+            MIGRAPHX_THROW("QLINEARADD: missing inputs");
+
+        const auto& in_a = args[0];
+        const auto& in_b = args[3];
+
+        auto sh_a = in_a->get_shape();
+        auto sh_b = in_b->get_shape();
+
+        auto type_a = sh_a.type();
+        auto type_b = sh_b.type();
+        if(type_a != migraphx::shape::int8_type and type_a != migraphx::shape::uint8_type)
+            MIGRAPHX_THROW("QLINEARADD: unsupported input type");
+        if(type_b != migraphx::shape::int8_type and type_b != migraphx::shape::uint8_type)
+            MIGRAPHX_THROW("QLINEARADD: unsupported input type");
+        if(type_a != type_b)
+            MIGRAPHX_THROW("QLINEARADD: mismatched input types");
+    }
+
+    instruction_ref parse(const op_desc& /* opd */,
+                          const onnx_parser& /*parser*/,
+                          const onnx_parser::node_info& info,
+                          const std::vector<instruction_ref>& args) const
+    {
+        check_inputs(args);
+
+        // A
+        const auto& in_a         = args[0];
+        const auto& in_scale_a   = args[1];
+        const auto& in_zero_pt_a = args[2];
+
+        auto dquant_a = bcast_qdq_instr("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info);
+
+        // B
+        const auto& in_b         = args[3];
+        const auto& in_scale_b   = args[4];
+        const auto& in_zero_pt_b = args[5];
+        auto dquant_b = bcast_qdq_instr("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info);
+
+        // C = A + B
+        auto out_c = info.add_common_op("add", dquant_a, dquant_b);
+
+        const auto& in_scale_c = args[6];
+
+        // zero_pt for C is supplied as the last optional argument..
+        if(args.size() == 8)
+            return (bcast_qdq_instr("quantizelinear", out_c, in_scale_c, args[7], info));
+
+        // if no zero_pt: just broadcast the scale..
+        auto bcast_scale_c = bcast_scalar_instr(out_c->get_shape(), in_scale_c, info);
+        return (info.add_instruction(migraphx::make_op("quantizelinear"), out_c, bcast_scale_c));
+    }
+};
+
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_qlinearconv.cpp
+++ b/src/onnx/parse_qlinearconv.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/onnx/padding.hpp>
+#include <migraphx/onnx/conv.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/onnx/checks.hpp>
+#include <migraphx/onnx/broadcast_qdq.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/stringutils.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+
+/*
+ *********************************************************************************
+ *  Reference: see QLinearConv in                                                *
+ *  https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md  *
+ *********************************************************************************
+
+com.microsoft.QLinearConv
+
+Version
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+ATTRIBUTES:
+auto_pad : string
+channels_last : int
+dilations : list of ints
+group : int
+kernel_shape : list of ints
+pads : list of ints
+strides : list of ints
+
+INPUTS (8 - 9):
+x : T1
+x_scale : tensor(float)
+x_zero_point : T1
+w : T2
+w_scale : tensor(float)
+w_zero_point : T2
+y_scale : tensor(float)
+y_zero_point : T3
+B (optional) : T4
+
+OUTPUTS:
+y : T3
+
+Type Constraints:
+T1 : tensor(int8), tensor(uint8)
+T2 : tensor(int8), tensor(uint8)
+T3 : tensor(int8), tensor(uint8)
+T4 : tensor(int32)
+
+More details also at:
+https://xadupre.github.io/draft/onnx/onnx_doc_folder/onnx__QLinearConv.html
+
+*/
+
+struct parse_qlinearconv : op_parser<parse_qlinearconv>
+{
+    std::vector<op_desc> operators() const { return {{"QLinearConv"}}; }
+
+    // basic type checking for QLinearConv Operator
+    void check_inputs(const std::vector<instruction_ref>& inp_arg) const
+    {
+        if(inp_arg.size() < 8)
+            MIGRAPHX_THROW("QLINEARCONV: missing inputs");
+
+        const instruction_ref& in_x       = inp_arg[0];
+        const instruction_ref& in_scale_x = inp_arg[1];
+        const instruction_ref& in_w       = inp_arg[3];
+        const instruction_ref& in_scale_w = inp_arg[4];
+        const instruction_ref& in_scale_y = inp_arg[6];
+
+        auto sh_x   = in_x->get_shape();
+        auto sh_w   = in_w->get_shape();
+        auto type_x = sh_x.type();
+        auto type_w = sh_w.type();
+
+        assert(in_x->get_shape().ndim() > 2);
+
+        if(type_x != shape::int8_type and type_x != shape::uint8_type)
+            MIGRAPHX_THROW("QLINEARCONV: unsupported input type");
+        if(type_w != shape::int8_type and type_w != shape::uint8_type)
+            MIGRAPHX_THROW("QLINEARCONV: unsupported weight type");
+        if(in_scale_x->get_shape().type() != shape::float_type)
+            MIGRAPHX_THROW("QLINEARCONV x scale type should be float");
+        if(in_scale_w->get_shape().type() != shape::float_type)
+            MIGRAPHX_THROW("QLINEARCONV: wt scale type should be float");
+        if(in_scale_y->get_shape().type() != shape::float_type)
+            MIGRAPHX_THROW("QLINEARCONV: y scale type should be float");
+        if(inp_arg.size() > 8 and inp_arg[8]->get_shape().type() != shape::int32_type)
+            MIGRAPHX_THROW("QLINEARCONV y bias should be int32");
+    }
+
+    // process all attributes of QLinearConv Operator..
+    value process_attributes(const onnx_parser& parser,
+                             const onnx_parser::node_info& info,
+                             const std::vector<instruction_ref>& args) const
+    {
+        value values;
+
+        const auto& in_x = args[0];
+        const auto& wt   = args[3];
+
+        size_t kdims = in_x->get_shape().ndim() - 2;
+
+        check_padding_mode(info, "QLINEARCONV");
+
+        values["stride"]   = std::vector<int>(kdims, 1);
+        values["dilation"] = std::vector<int>(kdims, 1);
+        values["padding"]  = std::vector<int>(kdims, 0);
+        values["group"]    = 1;
+
+        if(contains(info.attributes, "group"))
+            values["group"] = parser.parse_value(info.attributes.at("group")).template at<int>();
+
+        if(contains(info.attributes, "strides"))
+        {
+            std::vector<int> st;
+            copy(info.attributes.at("strides").ints(), std::back_inserter(st));
+            check_attr_sizes(kdims, st.size(), "QLINEARCONV: inconsistent strides");
+            values["stride"] = st;
+        }
+
+        if(contains(info.attributes, "dilations"))
+        {
+            std::vector<int> dil;
+            copy(info.attributes.at("dilations").ints(), std::back_inserter(dil));
+            check_attr_sizes(kdims, dil.size(), "QLINEARCONV: inconsistent dilations");
+            values["dilation"] = dil;
+        }
+
+        if(contains(info.attributes, "pads"))
+        {
+            std::vector<int> pads;
+            copy(info.attributes.at("pads").ints(), std::back_inserter(pads));
+            check_attr_sizes(kdims, pads.size() / 2, "QLINEARCONV: inconsistent padding");
+            values["padding"] = pads;
+        }
+        else if(contains(info.attributes, "auto_pad"))
+        {
+            auto in_lens = in_x->get_shape().lens();
+            auto wt_lens = wt->get_shape().lens();
+            std::vector<std::size_t> k_lens(wt_lens.begin() + 2, wt_lens.end());
+            std::vector<int64_t> pads = values["padding"].to_vector<std::int64_t>();
+            cal_auto_padding_size(
+                info, values, k_lens, values["dilation"].to_vector<std::size_t>(), in_lens, pads);
+            values["padding"] = pads;
+        }
+
+        recalc_conv_attributes(values, kdims);
+
+        return values;
+    }
+
+    instruction_ref add_bias_to_conv(const instruction_ref bias_arg,
+                                     const instruction_ref conv_instr,
+                                     const onnx_parser::node_info& info) const
+    {
+        auto conv_sh   = conv_instr->get_shape();
+        auto conv_lens = conv_sh.lens();
+        auto conv_type = conv_sh.type();
+
+        auto broadcast_bias = info.add_instruction(
+            migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", conv_lens}}), bias_arg);
+        auto f_bias =
+            info.add_instruction(make_op("convert", {{"target_type", conv_type}}), broadcast_bias);
+
+        return info.add_instruction(migraphx::make_op("add"), conv_instr, f_bias);
+    };
+
+    instruction_ref parse(const op_desc& /* opd */,
+                          const onnx_parser& parser,
+                          const onnx_parser::node_info& info,
+                          const std::vector<instruction_ref>& args) const
+    {
+        check_inputs(args);
+
+        auto values = process_attributes(parser, info, args);
+
+        // input: quantized x, scale, zero_pt
+        const instruction_ref& in_x         = args[0];
+        const instruction_ref& in_scale_x   = args[1];
+        const instruction_ref& in_zero_pt_x = args[2];
+
+        // input: quantized weights, scale, zero_pt
+        const instruction_ref& in_w         = args[3];
+        const instruction_ref& in_scale_w   = args[4];
+        const instruction_ref& in_zero_pt_w = args[5];
+
+        // for the dequantized output  y: scale & zero_pt
+        const instruction_ref& in_scale_y   = args[6];
+        const instruction_ref& in_zero_pt_y = args[7];
+
+        auto dquant_x = bcast_qdq_instr("dequantizelinear", in_x, in_scale_x, in_zero_pt_x, info);
+
+        auto dquant_w = bcast_qdq_instr("dequantizelinear", in_w, in_scale_w, in_zero_pt_w, info);
+
+        auto conv_op = migraphx::make_op("convolution", values);
+
+        auto conv_x_w = info.add_instruction(conv_op, dquant_x, dquant_w);
+
+        // Biases, if any.. : is an optional argument.
+        if(args.size() > 8)
+            conv_x_w = add_bias_to_conv(args[8], conv_x_w, info);
+
+        auto quant_conv =
+            bcast_qdq_instr("quantizelinear", conv_x_w, in_scale_y, in_zero_pt_y, info);
+        return quant_conv;
+    }
+};
+
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_qlinearglavgpool.cpp
+++ b/src/onnx/parse_qlinearglavgpool.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/op/pooling.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/onnx/checks.hpp>
+#include <migraphx/onnx/broadcast_qdq.hpp>
+#include <migraphx/instruction.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+
+/*
+ *********************************************************************************
+ *  Reference: see QLinearGlobalAveragePool in                                   *
+ *  github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md          *
+ *********************************************************************************
+
+QLinearGlobalAveragePool consumes an input tensor X and applies
+Average pooling across the values in the same channel. This is
+equivalent to AveragePool with kernel size equal to the spatial
+dimension of input tensor. Input is of type uint8_t or int8_t.
+
+Version
+This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
+
+Attributes
+channels_last : int
+
+Inputs
+X : T
+
+Input data tensor from the previous operator; According to channels_last, dimensions for image case
+are (N x C x H x W), or (N x H x W x C) where N is the batch size, C is the number of channels, and
+H and W are the height and the width of the data. For non image case, the dimensions are in the form
+of (N x C x D1 x D2 ... Dn), or (N x D1 X D2 ... Dn x C) where N is the batch size.
+
+x_scale : tensor(float)
+Scale of quantized input 'X'. It must be a scalar.
+
+x_zero_point : T
+Zero point tensor for input 'X'. It must be a scalar.
+
+y_scale : tensor(float)
+Scale of quantized output 'Y'. It must be a scalar.
+
+y_zero_point : T
+Zero point tensor for output 'Y'. It must be a scalar.
+
+Outputs
+Y : T
+Output data tensor from pooling across the input tensor. The output tensor has the same rank as the
+input. with the N and C value keep it value, while the other dimensions are all 1. Type Constraints
+T : tensor(uint8), tensor(int8)
+Constrain input and output types to signed/unsigned int8 tensors.
+
+*/
+
+struct parse_qlinearglobalaveragepool : op_parser<parse_qlinearglobalaveragepool>
+{
+    std::vector<op_desc> operators() const { return {{"QLinearGlobalAveragePool"}}; }
+
+    // basic type checking for QLinearGlobalAveragePool Operator
+    void check_inputs(const std::vector<instruction_ref>& args) const
+    {
+        if(args.size() < 5)
+            MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: missing inputs");
+
+        const auto& in_x      = args[0];
+        const auto& zero_pt_x = args[2];
+        const auto& zero_pt_y = args[4];
+
+        if(in_x->get_shape().ndim() <= 2)
+            MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: input dimensions too small");
+
+        auto type_x = in_x->get_shape().type();
+        if(type_x != migraphx::shape::int8_type and type_x != migraphx::shape::uint8_type)
+            MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: unsupported input type");
+
+        if(type_x != zero_pt_x->get_shape().type())
+            MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: mismatched type: input zero point");
+
+        if(type_x != zero_pt_y->get_shape().type())
+            MIGRAPHX_THROW("QLINEARGLOBALAVERAGEPOOL: mismatched type: output zero point");
+    }
+
+    instruction_ref parse(const op_desc& /* opd */,
+                          const onnx_parser& parser,
+                          const onnx_parser::node_info& info,
+                          const std::vector<instruction_ref>& args) const
+    {
+        int channels_last =
+            parser.parse_value(info.attributes.at("channels_last")).template at<int>();
+        if(channels_last != 0)
+            MIGRAPHX_THROW(
+                "QLINEARGLOBALAVERAGEPOOL: channels_last (N x D1..Dn x C) is not supported");
+
+        check_inputs(args);
+
+        // Input: X
+
+        const auto& in_x      = args[0];
+        const auto& scale_x   = args[1];
+        const auto& zero_pt_x = args[2];
+        auto dquant_x         = bcast_qdq_instr("dequantizelinear", in_x, scale_x, zero_pt_x, info);
+
+        // Output Y = globalaveragepool(X)
+
+        auto op   = migraphx::op::pooling{migraphx::op::pooling_mode::average};
+        auto lens = in_x->get_shape().lens();
+        std::vector<size_t> lengths(lens.begin() + 2, lens.end());
+        op.lengths = lengths;
+        op.padding = std::vector<size_t>(lens.size());
+        auto out_y = info.add_instruction(op, dquant_x);
+
+        const auto& scale_y   = args[3];
+        const auto& zero_pt_y = args[4];
+
+        auto out_quant_y = bcast_qdq_instr("quantizelinear", out_y, scale_y, zero_pt_y, info);
+
+        return out_quant_y;
+    }
+};
+
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_qlinearmatmul.cpp
+++ b/src/onnx/parse_qlinearmatmul.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/op/pooling.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/onnx/checks.hpp>
+#include <migraphx/onnx/broadcast_qdq.hpp>
+#include <migraphx/instruction.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+
+/*
+ *********************************************************************************
+ *  Reference: see QLinearMatMul in                                              *
+ *  https://onnx.ai/onnx/operators/onnx__QLinearMatMul.html                      *
+ *********************************************************************************
+
+Matrix product that behaves like numpy.matmul:
+
+https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.matmul.html. It consumes two
+quantized input tensors, their scales and zero points, scale and zero point of output, and computes
+the quantized output. The quantization formula is y = saturate((x / y_scale) + y_zero_point). For (x
+/ y_scale), it is rounding to nearest ties to even. Refer to https://en.wikipedia.org/wiki/Rounding
+for details. Scale and zero point must have same shape. They must be either scalar (per tensor) or
+N-D tensor (per row for ‘a’ and per column for ‘b’). Scalar refers to per tensor quantization
+whereas N-D refers to per row or per column quantization. If the input is 2D of shape [M, K] then
+zero point and scale tensor may be an M element vector [v_1, v_2, …, v_M] for per row quantization
+and K element vector of shape [v_1, v_2, …, v_K] for per column quantization. If the input is N-D
+tensor with shape [D1, D2, M, K] then zero point and scale tensor may have shape [D1, D2, M, 1] for
+per row quantization and shape [D1, D2, 1, K] for per column quantization. Production must never
+overflow, and accumulation may overflow if and only if in 32 bits.
+
+Inputs
+a (heterogeneous) - T1: N-dimensional quantized matrix a
+
+a_scale (heterogeneous) - tensor(float): scale of quantized input a
+
+a_zero_point (heterogeneous) - T1: zero point of quantized input a
+
+b (heterogeneous) - T2: N-dimensional quantized matrix b
+
+b_scale (heterogeneous) - tensor(float): scale of quantized input b
+
+b_zero_point (heterogeneous) - T2: zero point of quantized input b
+
+y_scale (heterogeneous) - tensor(float): scale of quantized output y
+
+y_zero_point (heterogeneous) - T3: zero point of quantized output y
+
+Outputs
+y (heterogeneous) - T3: Quantized matrix multiply results from a * b
+
+Type Constraints
+T1 in ( tensor(int8), tensor(uint8) ): Constrain input a and its zero point data type to 8-bit
+integer tensor.
+
+T2 in ( tensor(int8), tensor(uint8) ): Constrain input b and its zero point data type to 8-bit
+integer tensor.
+
+T3 in ( tensor(int8), tensor(uint8) ): Constrain output y and its zero point data type to 8-bit
+integer tensor.
+
+*/
+
+struct parse_qlinearmatmul : op_parser<parse_qlinearmatmul>
+{
+    std::vector<op_desc> operators() const { return {{"QLinearMatMul"}}; }
+
+    // basic type checking for QLinearMatMul Operator
+
+    void check_inputs(const std::vector<instruction_ref>& args) const
+    {
+        if(args.size() < 8)
+            MIGRAPHX_THROW("QLINEARMATMUL: missing inputs");
+
+        const auto& in_a = args[0];
+        const auto& in_b = args[3];
+
+        auto sh_a = in_a->get_shape();
+        auto sh_b = in_b->get_shape();
+
+        auto type_a = sh_a.type();
+        auto type_b = sh_b.type();
+        if(type_a != migraphx::shape::int8_type and type_a != migraphx::shape::uint8_type)
+            MIGRAPHX_THROW("QLINEARMATMUL: unsupported input type");
+        if(type_b != migraphx::shape::int8_type and type_b != migraphx::shape::uint8_type)
+            MIGRAPHX_THROW("QLINEARMATMUL: unsupported input type");
+
+        auto lens_a = sh_a.lens();
+        auto lens_b = sh_b.lens();
+
+        size_t dim_a = lens_a.size();
+        size_t dim_b = lens_b.size();
+
+        if(dim_a == 0 or dim_b == 0)
+            MIGRAPHX_THROW("QLINEARMATMUL: empty input");
+
+        // broadcast supported if either is 1-D -- the other can be a 2-D tensor.
+        // if it is 1-D, just prepend/append that lens and check further constraints..
+        if(dim_a == 1)
+        {
+            lens_a.insert(lens_a.begin(), 1);
+            dim_a++;
+        }
+        if(dim_b == 1)
+        {
+            lens_b.push_back(1);
+            dim_b++;
+        }
+
+        // 2-D or higher-order mat mul
+        if(dim_a != dim_b or *lens_a.rbegin() != *(lens_b.rbegin() + 1) or
+           not std::equal(lens_a.rbegin() + 2, lens_a.rend(), lens_b.rbegin() + 2, lens_b.rend()))
+            MIGRAPHX_THROW("QLINEARMATMUL: mismatched input dimensions");
+
+        if(migraphx::any_of({args[1], args[2], args[4], args[5]},
+                            [](auto arg) { return not arg->get_shape().scalar(); }))
+            MIGRAPHX_THROW("QLINEARMATMUL: unsupported row/column quantization");
+    }
+
+    instruction_ref parse(const op_desc& /* opd */,
+                          const onnx_parser& /*parser*/,
+                          const onnx_parser::node_info& info,
+                          const std::vector<instruction_ref>& args) const
+    {
+        check_inputs(args);
+
+        // A
+        const auto& in_a         = args[0];
+        const auto& in_scale_a   = args[1];
+        const auto& in_zero_pt_a = args[2];
+        auto dquant_a = bcast_qdq_instr("dequantizelinear", in_a, in_scale_a, in_zero_pt_a, info);
+
+        // B
+        const auto& in_b         = args[3];
+        const auto& in_scale_b   = args[4];
+        const auto& in_zero_pt_b = args[5];
+        auto dquant_b = bcast_qdq_instr("dequantizelinear", in_b, in_scale_b, in_zero_pt_b, info);
+
+        bool is_a_prepended = false;
+        bool is_b_appended  = false;
+
+        // un-squeeze either tensor if 1-D.
+        if(in_a->get_shape().ndim() == 1)
+        {
+            is_a_prepended = true;
+            dquant_a       = info.add_instruction(make_op("unsqueeze", {{"axes", {0}}}), dquant_a);
+        }
+        if(in_b->get_shape().ndim() == 1)
+        {
+            is_b_appended = true;
+            dquant_b      = info.add_instruction(make_op("unsqueeze", {{"axes", {1}}}), dquant_b);
+        }
+
+        // Y = A * B
+        auto out_y = info.add_instruction(migraphx::make_op("dot"), dquant_a, dquant_b);
+
+        // squeeze just once if necessary.. not twice.
+        if(is_a_prepended)
+            out_y = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), out_y);
+        else if(is_b_appended)
+            out_y = info.add_instruction(make_op("squeeze", {{"axes", {1}}}), out_y);
+
+        const auto& scale_y   = args[6];
+        const auto& zero_pt_y = args[7];
+
+        return bcast_qdq_instr("quantizelinear", out_y, scale_y, zero_pt_y, info);
+    }
+};
+
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_reshape.cpp
+++ b/src/onnx/parse_reshape.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -45,16 +45,25 @@ struct parse_reshape : op_parser<parse_reshape>
        {
            literal s = parser.parse_value(info.attributes.at("shape"));
            s.visit([&](auto v) { copy(v, std::back_inserter(dims)); });
+            return info.add_instruction(make_op("reshape", {{"dims", dims}}), args[0]);
        }
-        if(args.size() == 2)
+        else
        {
+            // 2 inputs
            auto s = args[1]->eval();
-            check_arg_empty(s, "Reshape: non-constant shape input is not supported");
-            s.visit([&](auto v) { copy(v, std::back_inserter(dims)); });
+            if(s.empty())
+            {
+                // arg[1] not eval-able
+                auto alloc_ins = info.add_instruction(
+                    make_op("allocate", {{"buf_type", args[0]->get_shape().type()}}), args[1]);
+                return info.add_instruction(make_op("reshape"), args[0], alloc_ins);
+            }
+            else
+            {
+                s.visit([&](auto v) { copy(v, std::back_inserter(dims)); });
+                return info.add_instruction(make_op("reshape", {{"dims", dims}}), args[0]);
+            }
        }
-
-        auto cont = info.add_instruction(make_op("contiguous"), args[0]);
-        return info.add_instruction(make_op("reshape", {{"dims", dims}}), cont);
    }
 };


--- a/src/onnx/parse_resize.cpp
+++ b/src/onnx/parse_resize.cpp
@@ -181,6 +181,76 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr)
    return nearest_mode;
 }

+static std::vector<double> get_scales(const onnx_parser::attribute_map& attr)
+{
+    std::vector<double> scales;
+    if(contains(attr, "scales"))
+    {
+        copy(attr.at("scales").floats(), std::back_inserter(scales));
+    }
+
+    return scales;
+}
+
+static void parse_args(const std::vector<instruction_ref>& args,
+                       const std::vector<size_t>& in_lens,
+                       const std::string& op_name,
+                       std::vector<double>& vec_scale,
+                       std::vector<std::size_t>& out_lens)
+{
+    for(const auto& arg : args)
+    {
+        if(arg->name() == "undefined" or arg == args.front())
+        {
+            continue;
+        }
+
+        // skipped empty input
+        auto lens = arg->get_shape().lens();
+        if(lens.empty())
+        {
+            continue;
+        }
+
+        auto type = arg->get_shape().type();
+        // output size
+        if(type == shape::int64_type)
+        {
+            auto arg_out_s = arg->eval();
+            check_arg_empty(arg_out_s,
+                            "PARSE_" + op_name + ": dynamic output size is not supported!");
+            arg_out_s.visit([&](const auto& ol) { out_lens.assign(ol.begin(), ol.end()); });
+
+            if(out_lens.size() != in_lens.size())
+            {
+                MIGRAPHX_THROW("PARSE_" + op_name +
+                               ": specified output size does not match input size");
+            }
+
+            // compute the scale
+            vec_scale.resize(in_lens.size());
+            std::transform(in_lens.begin(),
+                           in_lens.end(),
+                           out_lens.begin(),
+                           vec_scale.begin(),
+                           [](auto iss, auto oss) { return 1.0 * oss / iss; });
+        }
+        else
+        {
+
+            // scale input
+            if(lens[0] == in_lens.size())
+            {
+                auto arg_scale = arg->eval();
+                check_arg_empty(arg_scale,
+                                "PARSE_" + op_name + ": dynamic input scale is not supported!");
+
+                arg_scale.visit([&](const auto& v) { vec_scale.assign(v.begin(), v.end()); });
+            }
+        }
+    }
+}
+
 struct parse_resize : op_parser<parse_resize>
 {
    std::vector<op_desc> operators() const { return {{"Resize"}, {"Upsample"}}; }
@@ -214,72 +284,30 @@ struct parse_resize : op_parser<parse_resize>
        std::vector<std::size_t> out_lens(in_lens.size());

        // scale
-        std::vector<double> vec_scale;
+        std::vector<double> vec_scale = get_scales(info.attributes);

-        for(const auto& arg : args)
+        // If `scales` was not an attribute, it must be an input
+        if(vec_scale.empty())
        {
-            if(arg->name() == "undefined" or arg == args.front())
-            {
-                continue;
-            }
-
-            // skipped empty input
-            auto lens = arg->get_shape().lens();
-            if(lens.empty())
-            {
-                continue;
-            }
-
-            auto type = arg->get_shape().type();
-            // output size
-            if(type == shape::int64_type)
-            {
-                auto arg_out_s = arg->eval();
-                check_arg_empty(arg_out_s,
-                                "PARSE_" + opd.op_name + ": dynamic output size is not supported!");
-                arg_out_s.visit([&](const auto& ol) { out_lens.assign(ol.begin(), ol.end()); });
-
-                if(out_lens.size() != in_lens.size())
-                {
-                    MIGRAPHX_THROW("PARSE_" + opd.op_name +
-                                   ": specified output size does not match input size");
-                }
+            // Depending on the args, it *must* populate the `vec_scale`, and might populate
+            // `out_lens`
+            parse_args(args, in_lens, opd.op_name, vec_scale, out_lens);
+        }

-                // compute the scale
-                vec_scale.resize(in_lens.size());
-                std::transform(in_lens.begin(),
-                               in_lens.end(),
-                               out_lens.begin(),
-                               vec_scale.begin(),
-                               [](auto iss, auto oss) { return 1.0 * oss / iss; });
-            }
-            else
-            {
+        if(in_lens.size() != vec_scale.size())
+        {
+            MIGRAPHX_THROW("PARSE_" + opd.op_name + ": ranks of input and scale are different!");
+        }

-                // scale input
-                if(lens[0] == in_lens.size())
-                {
-                    auto arg_scale = arg->eval();
-                    check_arg_empty(arg_scale,
-                                    "PARSE_" + opd.op_name +
-                                        ": dynamic input scale is not supported!");
-
-                    arg_scale.visit([&](const auto& v) { vec_scale.assign(v.begin(), v.end()); });
-                    if(in_lens.size() != vec_scale.size())
-                    {
-                        MIGRAPHX_THROW("PARSE_" + opd.op_name +
-                                       ": ranks of input and scale are different!");
-                    }
-
-                    std::transform(in_lens.begin(),
-                                   in_lens.end(),
-                                   vec_scale.begin(),
-                                   out_lens.begin(),
-                                   [&](auto idx, auto scale) {
-                                       return static_cast<std::size_t>(idx * scale);
-                                   });
-                }
-            }
+        // if the output was not calculated yet, we update it based on the scales
+        if(all_of(out_lens.cbegin(), out_lens.cend(), [](auto o) { return o == 0; }))
+        {
+            std::transform(
+                in_lens.begin(),
+                in_lens.end(),
+                vec_scale.begin(),
+                out_lens.begin(),
+                [&](auto idx, auto scale) { return static_cast<std::size_t>(idx * scale); });
        }

        shape out_s{in_s.type(), out_lens};
@@ -288,7 +316,6 @@ struct parse_resize : op_parser<parse_resize>

        // reshape input to one-dimension
        std::vector<int64_t> rsp_lens = {static_cast<int64_t>(in_s.elements())};
-        args[0]                       = info.make_contiguous(args[0]);
        auto rsp = info.add_instruction(make_op("reshape", {{"dims", rsp_lens}}), args[0]);

        if(mode == "nearest")

--- a/src/onnx/parse_shrink.cpp
+++ b/src/onnx/parse_shrink.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/onnx/checks.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/make_op.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace onnx {
+
+struct parse_shrink : op_parser<parse_shrink>
+{
+    std::vector<op_desc> operators() const { return {{"Shrink"}}; }
+
+    instruction_ref parse(const op_desc&,
+                          const onnx_parser& parser,
+                          const onnx_parser::node_info& info,
+                          std::vector<instruction_ref> args) const
+    {
+        float bias = 0.0;
+        if(contains(info.attributes, "bias"))
+        {
+            bias = parser.parse_value(info.attributes.at("bias")).at<float>();
+        }
+        float lambd = 0.5;
+        if(contains(info.attributes, "lambd"))
+        {
+            lambd = parser.parse_value(info.attributes.at("lambd")).at<float>();
+        }
+
+        auto x             = args[0];
+        auto x_shape       = x->get_shape();
+        auto x_type        = x_shape.type();
+        auto lit_bias      = info.add_literal(bias);
+        auto lit_neg_lambd = info.add_literal(-lambd);
+        auto lit_lambd     = info.add_literal(lambd);
+
+        auto x_plus_bias = info.add_common_op("add", x, lit_bias);
+        auto x_min_bias  = info.add_common_op("sub", x, lit_bias);
+
+        auto cond1   = info.add_common_op("less", x, lit_neg_lambd);
+        auto cond2_a = info.add_common_op("not", cond1);
+        auto cond2_b = info.add_common_op("greater", x, lit_lambd);
+        auto cond2   = info.add_common_op("logical_and", cond2_a, cond2_b);
+
+        auto mul1 = info.add_instruction(make_op("convert", {{"target_type", x_type}}), cond1);
+        auto mul2 = info.add_instruction(make_op("convert", {{"target_type", x_type}}), cond2);
+
+        auto first  = info.add_common_op("mul", mul1, x_plus_bias);
+        auto second = info.add_common_op("mul", mul2, x_min_bias);
+        auto ret    = info.add_common_op("add", first, second);
+        if(ret->get_shape().type() != x_type)
+        {
+            ret = info.add_instruction(make_op("convert", {{"target_type", x_type}}), ret);
+        }
+        return ret;
+    }
+};
+
+} // namespace onnx
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_slice.cpp
+++ b/src/onnx/parse_slice.cpp
@@ -46,6 +46,9 @@ struct parse_slice : op_parser<parse_slice>

        void always_insert(instruction_ref arg) { op_args.insert(op_args.begin(), arg); }

+        /**
+         * Either insert argument into `this->op_args` or return the constant value of the argument
+         */
        std::vector<int64_t> insert(instruction_ref arg)
        {
            std::vector<int64_t> result;
@@ -137,23 +140,22 @@ struct parse_slice : op_parser<parse_slice>
        sd.always_insert(args.at(0));

        // If axes arg is not given, the default is all of them.
-        if(sd.op.axes.empty() and sd.op_args.size() < 3)
+        if(sd.op.axes.empty() and sd.op_args.size() <= 3)
        {
            std::vector<int64_t> axes(args[0]->get_shape().ndim());
            std::iota(axes.begin(), axes.end(), int64_t{0});
            sd.op.axes = axes;
        }

-        if(not sd.steps.empty())
+        if(std::any_of(sd.steps.begin(), sd.steps.end(), [](auto s) { return s != 1; }))
        {
            if(sd.op.starts.empty() or sd.op.ends.empty())
-                MIGRAPHX_THROW("PARSE_SLICE: steps and variable starts and ends is not supported");
+                MIGRAPHX_THROW(
+                    "PARSE_SLICE: steps and variable starts and/or ends is not supported");
            if(sd.op.axes.empty())
                MIGRAPHX_THROW("PARSE_SLICE: steps and variable axes is not supported");
        }

-        assert(sd.steps.empty() or sd.steps.size() == sd.op.axes.size());
-
        // If any axes have negative step, prepare to add a "reverse" op
        for(auto i : range(sd.steps.size()))
        {

--- a/src/onnx/parse_spacetodepth.cpp
+++ b/src/onnx/parse_spacetodepth.cpp
@@ -73,8 +73,7 @@ struct parse_spacetodepth : op_parser<parse_spacetodepth>
        std::vector<int64_t> perm = {0, 3, 5, 1, 2, 4};
        auto temp1 = info.add_instruction(make_op("reshape", {{"dims", trans_lens}}), args[0]);
        auto temp2 = info.add_instruction(make_op("transpose", {{"permutation", perm}}), temp1);
-        return info.add_instruction(make_op("reshape", {{"dims", res_lens}}),
-                                    info.make_contiguous(temp2));
+        return info.add_instruction(make_op("reshape", {{"dims", res_lens}}), temp2);
    }
 };


--- a/src/onnx/parse_split.cpp
+++ b/src/onnx/parse_split.cpp
@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
        // no split attribute, input is equally divided
        else
        {
-            if((lens[tuned_axis] % info.num_outputs) != 0)
+            std::size_t num_outputs = info.num_outputs;
+            // the num_outputs attribute seems to be redundant since we already have
+            // node_info::num_outputs, but we can still perform an error check
+            if(contains(info.attributes, "num_outputs"))
            {
-                MIGRAPHX_THROW("PARSE_SPLIT: input cannot be equally divided into " +
-                               std::to_string(info.num_outputs) + " splits!");
+                num_outputs =
+                    parser.parse_value(info.attributes.at("num_outputs")).at<std::size_t>();
+                if(num_outputs != info.num_outputs)
+                {
+                    MIGRAPHX_THROW("PARSE_SPLIT: num_outputs attribute " +
+                                   std::to_string(num_outputs) +
+                                   " doesn't match actual number of outputs " +
+                                   std::to_string(info.num_outputs) + "!");
+                }
+            }
+
+            if(lens[tuned_axis] % num_outputs == 0)
+            {
+                std::size_t chunk_size = lens[tuned_axis] / num_outputs;
+                vec_splits.resize(num_outputs, chunk_size);
+            }
+            else
+            {
+                std::size_t chunk_size      = lens[tuned_axis] / num_outputs + 1;
+                std::size_t last_chunk_size = lens[tuned_axis] - chunk_size * (num_outputs - 1);
+                vec_splits.resize(num_outputs - 1, chunk_size);
+                vec_splits.push_back(last_chunk_size);
            }
-            auto dl = lens[tuned_axis] / info.num_outputs;
-            vec_splits.resize(info.num_outputs, dl);
        }

        if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) !=

--- a/src/onnx/parse_trilu.cpp
+++ b/src/onnx/parse_trilu.cpp
@@ -56,9 +56,6 @@ struct parse_trilu : op_parser<parse_trilu>
            k = arg_k.at<int>();
        }

-        if(k < 0)
-            MIGRAPHX_THROW("PARSE_TRILU: negative k values not supported");
-
        if(contains(info.attributes, "upper"))
        {
            upper = static_cast<bool>(info.attributes.at("upper").i());
@@ -69,9 +66,12 @@ struct parse_trilu : op_parser<parse_trilu>
        // when creating the mask, if upper == 1,
        // the inner triangle will have values set to 0
        std::vector<bool> mask_mat(num_rows * num_cols, upper);
+        // if upper == 0, kth diagonal must also be masked
+        if(not upper)
+            k++;
        for(size_t i = 0; i < num_rows; i++)
        {
-            for(size_t j = 0; j < std::min(k, static_cast<int>(num_cols)); j++)
+            for(int j = 0; j < std::min(k, static_cast<int>(num_cols)); j++)
            {
                mask_mat[i * num_cols + j] = not upper;
            }

--- a/src/process.cpp
+++ b/src/process.cpp
@@ -26,13 +26,23 @@
 #include <migraphx/env.hpp>
 #include <functional>
 #include <iostream>
+#include <optional>
+
+#ifdef _WIN32
+// cppcheck-suppress definePrefix
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#else
 #include <unistd.h>
+#endif

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_CMD_EXECUTE)

+#ifndef _WIN32
+
 std::function<void(const char*)> redirect_to(std::ostream& os)
 {
    return [&](const char* x) { os << x; };
@@ -74,6 +84,155 @@ int exec(const std::string& cmd, std::function<void(process::writer)> std_in)
    });
 }

+#else
+
+constexpr std::size_t MIGRAPHX_PROCESS_BUFSIZE = 4096;
+
+class pipe
+{
+    public:
+    explicit pipe(bool inherit_handle = true)
+    {
+        SECURITY_ATTRIBUTES attrs;
+        attrs.nLength              = sizeof(SECURITY_ATTRIBUTES);
+        attrs.bInheritHandle       = inherit_handle ? TRUE : FALSE;
+        attrs.lpSecurityDescriptor = nullptr;
+
+        if(CreatePipe(&m_read, &m_write, &attrs, 0) == FALSE)
+            throw GetLastError();
+
+        if(SetHandleInformation(&m_read, HANDLE_FLAG_INHERIT, 0) == FALSE)
+            throw GetLastError();
+    }
+
+    pipe(const pipe&)            = delete;
+    pipe& operator=(const pipe&) = delete;
+
+    pipe(pipe&&) = default;
+
+    ~pipe()
+    {
+        CloseHandle(m_read);
+        m_read = nullptr;
+        CloseHandle(m_write);
+        m_write = nullptr;
+    }
+
+    std::optional<std::pair<bool, DWORD>> read(LPVOID buffer, DWORD length) const
+    {
+        DWORD bytes_read;
+        if(ReadFile(m_read, buffer, length, &bytes_read, nullptr) == FALSE)
+        {
+            DWORD error{GetLastError()};
+            if(error != ERROR_MORE_DATA)
+            {
+                return std::nullopt;
+            }
+            return {{true, bytes_read}};
+        }
+        return {{false, bytes_read}};
+    }
+
+    HANDLE get_read_handle() const { return m_read; }
+
+    bool write(LPCVOID buffer, DWORD length) const
+    {
+        DWORD bytes_written;
+        return WriteFile(m_write, buffer, length, &bytes_written, nullptr) == TRUE;
+    }
+
+    HANDLE get_write_handle() const { return m_write; }
+
+    private:
+    HANDLE m_write = nullptr, m_read = nullptr;
+};
+
+template <typename F>
+int exec(const std::string& cmd, F f)
+{
+    try
+    {
+        if(enabled(MIGRAPHX_TRACE_CMD_EXECUTE{}))
+            std::cout << cmd << std::endl;
+
+        STARTUPINFO info;
+        PROCESS_INFORMATION process_info;
+
+        pipe in{}, out{};
+
+        ZeroMemory(&info, sizeof(STARTUPINFO));
+        info.cb         = sizeof(STARTUPINFO);
+        info.hStdError  = out.get_write_handle();
+        info.hStdOutput = out.get_write_handle();
+        info.hStdInput  = in.get_read_handle();
+        info.dwFlags |= STARTF_USESTDHANDLES;
+
+        ZeroMemory(&process_info, sizeof(process_info));
+
+        if(CreateProcess(nullptr,
+                         const_cast<LPSTR>(cmd.c_str()),
+                         nullptr,
+                         nullptr,
+                         TRUE,
+                         0,
+                         nullptr,
+                         nullptr,
+                         &info,
+                         &process_info) == FALSE)
+        {
+            return GetLastError();
+        }
+
+        f(in, out);
+
+        WaitForSingleObject(process_info.hProcess, INFINITE);
+
+        DWORD status{};
+        GetExitCodeProcess(process_info.hProcess, &status);
+
+        CloseHandle(process_info.hProcess);
+        CloseHandle(process_info.hThread);
+
+        return static_cast<int>(status);
+    }
+    // cppcheck-suppress catchExceptionByValue
+    catch(DWORD last_error)
+    {
+        return last_error;
+    }
+}
+
+int exec(const std::string& cmd)
+{
+    TCHAR buffer[MIGRAPHX_PROCESS_BUFSIZE];
+    HANDLE std_out{GetStdHandle(STD_OUTPUT_HANDLE)};
+    return (std_out == nullptr or std_out == INVALID_HANDLE_VALUE)
+               ? GetLastError()
+               : exec(cmd, [&](const pipe&, const pipe& out) {
+                     for(;;)
+                     {
+                         if(auto result = out.read(buffer, MIGRAPHX_PROCESS_BUFSIZE))
+                         {
+                             auto [more_data, bytes_read] = *result;
+                             if(not more_data or bytes_read == 0)
+                                 break;
+                             DWORD written;
+                             if(WriteFile(std_out, buffer, bytes_read, &written, nullptr) == FALSE)
+                                 break;
+                         }
+                     }
+                 });
+}
+
+int exec(const std::string& cmd, std::function<void(process::writer)> std_in)
+{
+    return exec(cmd, [&](const pipe& in, const pipe&) {
+        std_in([&](const char* buffer, std::size_t n) { in.write(buffer, n); });
+    });
+}
+
+#endif
+
 struct process_impl
 {
    std::string command{};
@@ -119,7 +278,14 @@ process& process::cwd(const fs::path& p)
    return *this;
 }

-void process::exec() { impl->check_exec(impl->get_command(), redirect_to(std::cout)); }
+void process::exec()
+{
+#ifndef _WIN32
+    impl->check_exec(impl->get_command(), redirect_to(std::cout));
+#else
+    impl->check_exec(impl->get_command());
+#endif
+}

 void process::write(std::function<void(process::writer)> pipe_in)
 {

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
    os << std::endl;

    os << "Batch size: " << batch << std::endl;
-    os << "Rate: " << rate * batch << "/sec" << std::endl;
+    os << "Rate: " << rate * batch << " inferences/sec" << std::endl;
    os << "Total time: " << total_time << "ms" << std::endl;
    os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
    os << "Overhead time: " << overhead_time << "ms"

--- a/src/py/CMakeLists.txt
+++ b/src/py/CMakeLists.txt
@@ -22,27 +22,24 @@
 # THE SOFTWARE.
 #####################################################################################

-option(MIGRAPHX_ENABLE_PYTHON "Enable python bindings" ON)
 add_library(migraphx_py py_loader.cpp)
 migraphx_generate_export_header(migraphx_py)
 target_include_directories(migraphx_py PRIVATE include)
 target_link_libraries(migraphx_py PUBLIC migraphx)
 rocm_install_targets(TARGETS migraphx_py INCLUDE include)
-if(MIGRAPHX_ENABLE_PYTHON)
-    include(PythonModules)

+include(PythonModules)

-    foreach(PYTHON_VERSION ${PYTHON_VERSIONS})
-        py_add_module(migraphx_pybind_${PYTHON_VERSION} migraphx_py.cpp PYTHON_VERSION ${PYTHON_VERSION} PYTHON_MODULE migraphx)
-        target_link_libraries(migraphx_pybind_${PYTHON_VERSION} PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_all_targets)
-        rocm_install_targets(TARGETS migraphx_pybind_${PYTHON_VERSION})
-        add_dependencies(migraphx_py migraphx_pybind_${PYTHON_VERSION})
-        
-        add_library(migraphx_py_${PYTHON_VERSION} py.cpp)
-        target_include_directories(migraphx_py_${PYTHON_VERSION} PRIVATE include)
-        target_link_libraries(migraphx_py_${PYTHON_VERSION} PUBLIC migraphx)
-        target_link_libraries(migraphx_py_${PYTHON_VERSION} PRIVATE pybind11::pybind11 python${PYTHON_VERSION}::runtime)
-        rocm_install_targets(TARGETS migraphx_py_${PYTHON_VERSION})
-        add_dependencies(migraphx_py migraphx_py_${PYTHON_VERSION})
-    endforeach()
-endif()
+foreach(PYTHON_VERSION ${PYTHON_VERSIONS})
+    py_add_module(migraphx_pybind_${PYTHON_VERSION} migraphx_py.cpp PYTHON_VERSION ${PYTHON_VERSION} PYTHON_MODULE migraphx)
+    target_link_libraries(migraphx_pybind_${PYTHON_VERSION} PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_all_targets)
+    rocm_install_targets(TARGETS migraphx_pybind_${PYTHON_VERSION})
+    add_dependencies(migraphx_py migraphx_pybind_${PYTHON_VERSION})
+
+    add_library(migraphx_py_${PYTHON_VERSION} py.cpp)
+    target_include_directories(migraphx_py_${PYTHON_VERSION} PRIVATE include)
+    target_link_libraries(migraphx_py_${PYTHON_VERSION} PUBLIC migraphx)
+    target_link_libraries(migraphx_py_${PYTHON_VERSION} PRIVATE pybind11::pybind11 python${PYTHON_VERSION}::runtime)
+    rocm_install_targets(TARGETS migraphx_py_${PYTHON_VERSION})
+    add_dependencies(migraphx_py migraphx_py_${PYTHON_VERSION})
+endforeach()
--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -472,7 +472,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
               map_dyn_input_dims,
           bool skip_unknown_operators,
           bool print_program_on_error,
-           int64_t max_loop_iterations) {
+           int64_t max_loop_iterations,
+           int64_t limit_max_iterations) {
            migraphx::onnx_options options;
            options.default_dim_value      = default_dim_value;
            options.default_dyn_dim_value  = default_dyn_dim_value;
@@ -481,6 +482,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
            options.skip_unknown_operators = skip_unknown_operators;
            options.print_program_on_error = print_program_on_error;
            options.max_loop_iterations    = max_loop_iterations;
+            options.limit_max_iterations   = limit_max_iterations;
            return migraphx::parse_onnx(filename, options);
        },
        "Parse onnx file",
@@ -492,7 +494,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
            std::unordered_map<std::string, std::vector<migraphx::shape::dynamic_dimension>>(),
        py::arg("skip_unknown_operators") = false,
        py::arg("print_program_on_error") = false,
-        py::arg("max_loop_iterations")    = 10);
+        py::arg("max_loop_iterations")    = 10,
+        py::arg("limit_max_iterations")   = std::numeric_limits<uint16_t>::max());

    m.def(
        "parse_onnx_buffer",