Merge branch 'develop' into dyn_resize_gather

0662a9a3 · Brian Pickrell · GitHub · b74d3a8f · 35e5298e · 0662a9a3
Unverified Commit 0662a9a3 authored Nov 09, 2023 by Brian Pickrell Committed by GitHub Nov 09, 2023
20 changed files
--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -59,6 +59,13 @@ namespace migraphx {
 namespace driver {
 inline namespace MIGRAPHX_INLINE_NS {

+inline std::string get_version()
+{
+    return "MIGraphX Version: " + std::to_string(MIGRAPHX_VERSION_MAJOR) + "." +
+           std::to_string(MIGRAPHX_VERSION_MINOR) + "." + std::to_string(MIGRAPHX_VERSION_PATCH) +
+           "." MIGRAPHX_VERSION_TWEAK;
+}
+
 struct loader
 {
    std::string model;
@@ -597,17 +604,6 @@ struct verify : command<verify>
    }
 };

-struct version : command<version>
-{
-    void parse(const argument_parser&) {}
-    void run() const
-    {
-        std::cout << "MIGraphX Version: " << MIGRAPHX_VERSION_MAJOR << "." << MIGRAPHX_VERSION_MINOR
-                  << "." << MIGRAPHX_VERSION_PATCH << "."
-                  << MIGRAPHX_STRINGIZE(MIGRAPHX_VERSION_TWEAK) << std::endl;
-    }
-};
-
 struct compile : command<compile>
 {
    compiler c;
@@ -760,16 +756,14 @@ struct main_command
    }
    void parse(argument_parser& ap)
    {
-        std::string version_str = "MIGraphX Version: " + std::to_string(MIGRAPHX_VERSION_MAJOR) +
-                                  "." + std::to_string(MIGRAPHX_VERSION_MINOR) + "." +
-                                  std::to_string(MIGRAPHX_VERSION_PATCH) + "." +
-                                  MIGRAPHX_STRINGIZE(MIGRAPHX_VERSION_TWEAK);
+        std::string version_str = get_version();
        ap(wrong_commands, {}, ap.metavar("<command>"), ap.append());
        ap(nullptr, {"-h", "--help"}, ap.help("Show help"), ap.show_help(get_command_help()));
        ap(nullptr,
           {"-v", "--version"},
           ap.help("Show MIGraphX version"),
           ap.show_help(version_str));
+        ap(nullptr, {"--ort-sha"}, ap.help("Show MIGraphX onnx runtime SHA"));

        // Trim command off of exe name
        ap.set_exe_name(ap.get_exe_name().substr(0, ap.get_exe_name().size() - 5));
@@ -812,7 +806,6 @@ using namespace migraphx::driver; // NOLINT
 int main(int argc, const char* argv[])
 {
    std::vector<std::string> args(argv + 1, argv + argc);
-
    // no argument, print the help infomration by default
    if(args.empty())
    {
@@ -822,15 +815,27 @@ int main(int argc, const char* argv[])
    auto&& m = get_commands();
    auto cmd = args.front();

-    if(cmd == "ort-sha")
+    if(cmd == "--ort-sha")
    {
        std::cout << MIGRAPHX_ORT_SHA1 << std::endl;
        return 0;
    }
+    if(cmd == "-v" or cmd == "--version")
+    {
+        std::cout << get_version() << std::endl;
+        return 0;
+    }

    if(m.count(cmd) > 0)
    {
-        m.at(cmd)(argv[0], {args.begin() + 1, args.end()});
+        std::string driver_invocation =
+            std::string(argv[0]) + " " + migraphx::to_string_range(args, " ");
+        std::cout << "Running [ " << get_version() << " ]: " << driver_invocation << std::endl;
+
+        m.at(cmd)(argv[0],
+                  {args.begin() + 1, args.end()}); // run driver command found in commands map
+
+        std::cout << "[ " << get_version() << " ] Complete: " << driver_invocation << std::endl;
    }
    else
    {

--- a/src/include/migraphx/onnx.hpp
+++ b/src/include/migraphx/onnx.hpp
@@ -48,8 +48,12 @@ struct onnx_options
    bool skip_unknown_operators = false;
    /// Print program if an error occurs
    bool print_program_on_error = false;
-    /// Max iter num for the loop operator
+    /// Max iter num for the loop operator if trip count is not set
    int64_t max_loop_iterations = 10;
+    /// Max iter limit for the loop operator.
+    /// Since loop will become a tensor of max iter size a huge number can cause overflow during
+    /// shape computations.
+    int64_t limit_max_iterations = std::numeric_limits<uint16_t>::max();
    /// Use dynamic output for operators when available
    bool use_dyn_output = false;
 };

--- a/src/include/migraphx/op/allocate.hpp
+++ b/src/include/migraphx/op/allocate.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -37,20 +37,22 @@ namespace op {
 * Static allocate:
 * No inputs: `allocate()`
 * `this.s` attribute set to the static output shape of the buffer.
+ * `this.s` attribute can be set to a dynamic output shape; however this will allocate the maximum
+ * buffer size for that case
 *
 * Dynamic allocate:
 * One input: `allocate(output_dims)`
 * `output_dims` are the output buffer dimensions and has a static shape.
- * Either `this.s` or `this.buf_type` must be set to calculate the dynamic output shape at compute
- * time. If `this.buf_type` is set, the compute_shape() of allocate at compile time will have
- * dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set then the
- * compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
+ * Either `this.s` or `this.buf_type` (but not both) must be set to calculate the dynamic output
+ * shape at compute time. If `this.buf_type` is set, the compute_shape() of allocate at compile time
+ * will have dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set
+ * then the compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
 */
 struct allocate
 {
-    shape s{};
+    optional<shape> s;
    // for dynamic allocate to set the buffer type
-    shape::type_t buf_type = shape::half_type;
+    optional<shape::type_t> buf_type;

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -62,8 +64,12 @@ struct allocate

    shape compute_shape(const std::vector<shape>& inputs) const
    {
-        if(s != shape())
+        if(s.has_value())
        {
+            if(buf_type.has_value())
+            {
+                MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both set");
+            }
            if(inputs.size() == 1)
            {
                migraphx::check_shapes{inputs, *this, false}.only_dims(1);
@@ -72,16 +78,20 @@ struct allocate
            {
                migraphx::check_shapes{inputs, *this, false}.has(0);
            }
-            return s;
+            return s.value();
        }
        else
        {
+            if(not buf_type.has_value())
+            {
+                MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both not set");
+            }
            migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
            const auto& out_dims = inputs.at(0);
            std::size_t max_val = std::numeric_limits<std::size_t>::max();
            std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
                                                           shape::dynamic_dimension{0, max_val});
-            return {buf_type, dyn_dims};
+            return {buf_type.value(), dyn_dims};
        }
    }
    argument compute(const shape& output_shape, const std::vector<argument>& args) const
@@ -94,7 +104,11 @@ struct allocate
        {
            std::vector<std::size_t> output_dims(output_shape.ndim());
            args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
-            return argument{shape{buf_type, output_dims}};
+            if(s)
+            {
+                return argument{shape{s->type(), output_dims}};
+            }
+            return argument{shape{buf_type.value(), output_dims}};
        }
    }
 };

--- a/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,25 +21,32 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
-#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
+#ifndef MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
+#define MIGRAPHX_GUARD_OPERATORS_ISINF_HPP

-#include <migraphx/program.hpp>
-#include <migraphx/gpu/context.hpp>
+#include <migraphx/op/unary.hpp>
+#include <migraphx/config.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+namespace op {

-namespace gpu {
-
-struct MIGRAPHX_GPU_EXPORT pack_int8_args
+struct isinf : unary<isinf>
 {
-    std::string name() const { return "gpu::pack_int8_args"; }
-    void apply(module& m) const;
-    shape pack_int8_shape(const shape& s) const;
+    auto apply() const
+    {
+        return [&](auto x) { return std::isinf(x); };
+    }
+
+    std::string name() const { return "isinf"; }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        return unary<isinf>::compute_shape(std::move(inputs)).with_type(shape::bool_type);
+    }
 };

-} // namespace gpu
+} // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/include/migraphx/op/multinomial.hpp
+++ b/src/include/migraphx/op/multinomial.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,11 +21,52 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+
+/**
+ *  * Multinomial or categorical distribution.  Performs a sampling of random input
+ *         and returns a count of
+ *         each category, or bucket.  This does not require the standard multinomial
+ *         distribution but instead takes a probability distribution, i.e. cumulative
+ *         distribution function (CDF) as its first input.
+ *
+ *      Inputs:   args[0] - a tensor of probabilities for each category.  Values are
+ *                          cumulative density function
+ *                          totals as provided by operation prefix_scan_sum.  Values are
+ *                          cumulative probabilities (i.e. start with any set of numbers > 0
+ *                          and then apply prefix_scan_sum).  Values do not need to be
+ *                          normalized to sum to 1; this is done in runtime computation.
+ *
+ *                          This input has Rank 2.  Dimension 0 is batch #, so that there can be
+ *                          a different CDF for each iteration in the batch.  The size of dimension
+ *                          1 is the number of categories.
+ *
+ *                args[1] - a tensor of random numbers.  The last dimension is the sample
+ *                          size, i.e. the number of
+ *                          random samples in each iteration of the batch.  Nominally
+ *                          has two dimensions where the first dimension is batch size, but
+ *                          any reshaping such that the total
+ *                          number of elements is (batch_size * sample_size) is legal.
+ *
+ *                          Values as created by a std::mt19937 like this:
+ *
+ *                           size_t sample_size = 100000;
+ *                           float seed         = 0.0f;
+ *                           std::mt19937 gen(seed);
+ *                           std::uniform_real_distribution<> dis(0.0, 1.0);
+ *                           std::vector<float> rand_samples(sample_size);
+ *                           std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
+ *                                dis(gen); });
+ *
+ *        Output:   A 2D vector of category each input.  Dimensions are (Input 1[first], Input
+ 2[last]).
+ *
+*/
 #ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
 #define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP

-#include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/par_for.hpp>
 #include <migraphx/reflect.hpp>
 #include <random>
@@ -47,22 +88,35 @@ struct multinomial
    std::string name() const { return "multinomial"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2).only_dims(2);
-        size_t sample_size = inputs.back().lens().back();
+        check_shapes{inputs, *this, true}.has(2).only_dims(2);

-        if(not contains({shape::int32_type, shape::int64_type}, dtype))
-            MIGRAPHX_THROW(
-                "Multinomial: Invalid output type. Valid types are int32_type and int64_type.");
+        if(inputs.back().ndim() < 1)
+            MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions");
+        if(dtype == shape::bool_type)
+            MIGRAPHX_THROW("Multinomial: boolean output type invalid.");

-        return {dtype, {inputs.front().lens().front(), sample_size}};
+        // Output takes one dimension from each of the two input shapes.  If they are both fixed,
+        // return a static shape
+        if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed()))
+        {
+            if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed()))
+            {
+                size_t batch = {inputs.front().max_lens().front()};
+                size_t sample_size{inputs.back().max_lens().back()};
+                return {dtype, {batch, sample_size}};
+            }
+        }
+        return {dtype,
+                {inputs.front().to_dynamic().dyn_dims().front(),
+                 inputs.back().to_dynamic().dyn_dims().back()}};
    }

-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
-        size_t batch_size  = output_shape.lens().front();
+        argument result{dyn_out.computed_shape};
+        size_t batch_size  = dyn_out.computed_shape.lens().front();
        size_t class_size  = args[0].get_shape().lens().back();
-        size_t sample_size = output_shape.lens().back();
+        size_t sample_size = dyn_out.computed_shape.lens().back();

        visit_all(args[0], args[1])([&](auto cdf, auto dist) {
            result.visit([&](auto output) {
@@ -70,13 +124,16 @@ struct multinomial
                    auto idx       = args[1].get_shape().multi(i);
                    auto cdf_begin = cdf.begin() + (idx[0] * class_size);
                    auto cdf_end   = cdf_begin + class_size;
+
+                    // std::upper_bound returns an iterator to the bucket the value belongs in,
+                    // when normalized by the probability distribution dist
                    auto sample_iter =
                        std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
+                    // convert iterator to an integer index
                    output[i] = std::distance(cdf_begin, sample_iter);
                });
            });
        });
-
        return result;
    }
 };

--- a/src/include/migraphx/op/round.hpp
+++ b/src/include/migraphx/op/round.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,24 +21,28 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_OPERATORS_ROUND_HPP
-#define MIGRAPHX_GUARD_OPERATORS_ROUND_HPP
+#ifndef MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
+#define MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP

 #include <migraphx/op/unary.hpp>
 #include <migraphx/config.hpp>
+#include <fenv.h>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-
-struct round : unary<round>
+struct nearbyint : unary<nearbyint>
 {
    auto apply() const
    {
-        return [](auto x) { return std::round(x); };
+        return [](auto x) {
+            auto rounding_mode = fegetround();
+            fesetround(FE_TONEAREST);
+            return std::nearbyint(x);
+            fesetround(rounding_mode);
+        };
    }
 };
-
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/op/normalize_attribute.hpp
+++ b/src/include/migraphx/op/normalize_attribute.hpp
@@ -40,6 +40,8 @@ namespace op {
 * 2. use_rank (default) vs use_len:
 *  `use_rank` sets the max value/index of the attribute as the rank of lens.
 *  `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
+ *      Uses the dynamic_dimension.max value for dynamic shapes. Returns the original vector
+ *      (no normalization) if any of dynamic_dimension[axes] are not fixed.
 * 3. `clip_min` vs. `not_clip_min` (default):
 *  Clip values less than the minimum to the minimum or not.
 * 4. `include_min` vs. `exclude_min` (default):

--- a/src/include/migraphx/op/prefix_scan_op.hpp
+++ b/src/include/migraphx/op/prefix_scan_op.hpp
@@ -22,6 +22,12 @@
 * THE SOFTWARE.
 */

+/**
+ * Parent struct for prefix scan ops.  A prefix scan is a mathematical entity useful
+ * in parallelizing various computations.  Given a list of numbers, a prefix scan
+ * op returns an equal size list of running totals of the values.  Other operations
+ * besides addition can be supported by child ops.
+ */
 #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
 #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP


--- a/src/include/migraphx/op/quantizelinear.hpp
+++ b/src/include/migraphx/op/quantizelinear.hpp
@@ -30,11 +30,11 @@
 #include <migraphx/par_for.hpp>
 #include <migraphx/value.hpp>
 #include <cmath>
+#include <fenv.h>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-
 struct quantizelinear
 {
    std::string name() const { return "quantizelinear"; }
@@ -71,26 +71,26 @@ struct quantizelinear
        {
            y_zero_point = args.at(2);
        }
-
        argument result{output_shape};
+        auto rounding_mode = fegetround();
+        fesetround(FE_TONEAREST);
        visit_all(result, y_zero_point)([&](auto output, auto zero_pts) {
            visit_all(x, y_scale)([&](auto input, auto scales) {
                using quant_type = typename decltype(output)::value_type;
                auto min_value   = std::numeric_limits<quant_type>::min();
                auto max_value   = std::numeric_limits<quant_type>::max();
                par_for(output_shape.elements(), [&](auto i) {
-                    int64_t quantized = static_cast<int64_t>(std::round(input[i] / scales[i])) +
+                    int64_t quantized = static_cast<int64_t>(std::nearbyint(input[i] / scales[i])) +
                                        static_cast<int64_t>(zero_pts[i]);
                    output[i] = std::max(static_cast<int64_t>(min_value),
                                         std::min(static_cast<int64_t>(max_value), quantized));
                });
            });
        });
-
+        fesetround(rounding_mode);
        return result;
    }
 };
-
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/op/random_uniform.hpp
+++ b/src/include/migraphx/op/random_uniform.hpp
@@ -65,11 +65,10 @@ struct random_uniform
        return inputs.at(1);
    }

-    argument compute(const shape&, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
        // Output goes into the passed buffer, not the shape output.
-        auto result = args[1];
-
+        argument result{dyn_out.computed_shape};
        uint64_t local_seed = args[0].at<uint64_t>(0);
        std::mt19937 gen(local_seed);


--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
@@ -38,6 +38,18 @@ namespace op {

 /**
 * Slice operator that accepts variable axes, starts and ends.
+ * All of `starts`, `ends`, and `axes` must be supplied by either
+ * their attribute or an input (but not both).
+ *
+ * Valid calls:
+ * slice(input); axes, starts, ends set
+ * slice(input, starts); axes, ends set
+ * slice(input, ends); starts, axes set
+ * slice(input, axes); starts, ends set
+ * slice(input, starts, ends); axes set
+ * slice(input, starts, axes); ends set
+ * slice(input, ends, axes); starts set
+ * slice(input, start, ends, axes); none set
 *
 * Attributes:
 * axes: constant axes to slice over (optional)
@@ -46,8 +58,8 @@ namespace op {
 *
 * Parameters:
 * data: the input tensor to slice (dynamic or static shape)
- * input_starts: starting indicies of slice (optional, static shape)
- * input_ends: ending indicies of slice (optional, static shape)
+ * input_starts: starting indices of slice (optional, static shape)
+ * input_ends: ending indices of slice (optional, static shape)
 * input_axes: axes to slice over (optional, static shape)
 */
 struct slice
@@ -56,6 +68,18 @@ struct slice
    std::vector<int64_t> starts{};
    std::vector<int64_t> ends{};

+    /**
+     * Named arrays for the set attribute possibilities.
+     */
+    static constexpr std::array<bool, 3> all_set     = {true, true, true};
+    static constexpr std::array<bool, 3> ends_axes   = {false, true, true};
+    static constexpr std::array<bool, 3> starts_axes = {true, false, true};
+    static constexpr std::array<bool, 3> starts_ends = {true, true, false};
+    static constexpr std::array<bool, 3> axes_only   = {false, false, true};
+    static constexpr std::array<bool, 3> ends_only   = {false, true, false};
+    static constexpr std::array<bool, 3> starts_only = {true, false, false};
+    static constexpr std::array<bool, 3> none_set    = {false, false, false};
+
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
@@ -63,24 +87,26 @@ struct slice
    }

    /**
-     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are
-     * within limits.
+     * Ensure that attribute axes is within limits.
+     * Will attempt to normalize starts and ends; but will use the dynamic_dimension.max
+     * values for dynamic shapes. This makes it so you have to renormalize for
+     * non-fixed dynamic_dimensions.
     */
    value attributes() const
    {
-        value normalize     = value::object{};
-        normalize["axes"]   = value::array{normalize_attribute::include_min};
-        normalize["starts"] = value::array{normalize_attribute::clip_max,
-                                           normalize_attribute::clip_min,
-                                           normalize_attribute::include_max,
-                                           normalize_attribute::use_len,
-                                           normalize_attribute::include_min};
-        normalize["ends"]   = value::array{normalize_attribute::clip_max,
-                                         normalize_attribute::clip_min,
-                                         normalize_attribute::include_max,
-                                         normalize_attribute::use_len,
-                                         normalize_attribute::include_min};
-        return {{"normalize_axes", normalize}};
+        value normalize_axes     = value::object{};
+        normalize_axes["axes"]   = value::array{normalize_attribute::include_min};
+        normalize_axes["starts"] = value::array{normalize_attribute::clip_max,
+                                                normalize_attribute::clip_min,
+                                                normalize_attribute::include_max,
+                                                normalize_attribute::use_len,
+                                                normalize_attribute::include_min};
+        normalize_axes["ends"]   = value::array{normalize_attribute::clip_max,
+                                              normalize_attribute::clip_min,
+                                              normalize_attribute::include_max,
+                                              normalize_attribute::use_len,
+                                              normalize_attribute::include_min};
+        return {{"normalize_axes", normalize_axes}};
    }

    std::string name() const { return "slice"; }
@@ -88,7 +114,7 @@ struct slice
    /**
     * Computes the slice output shape dimensions for given starts, ends,and axes.
     * Templated to also handle tensor views.
-     * Possibily different type between [in_starts, in_ends] and [in_axes] if in_axes is this
+     * Possibly different type between [in_starts, in_ends] and [in_axes] if in_axes is this
     * object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
     */
    template <class A, class B>
@@ -104,62 +130,160 @@ struct slice
        return new_lens;
    }

-    shape normalize_compute_shape(std::vector<shape> inputs) const
+    /// Get the attributes that are non-empty
+    std::array<bool, 3> get_set_attributes() const
    {
-        check_shapes{inputs, *this, true}.has(1, 3, 4);
-        auto input_shape = inputs[0];
-        if(inputs.size() == 1)
+        std::array<std::vector<int64_t>, 3> attrs = {this->starts, this->ends, this->axes};
+        std::array<bool, 3> bool_vec;
+        std::transform(
+            attrs.cbegin(), attrs.cend(), bool_vec.begin(), [](auto a) { return not a.empty(); });
+        return bool_vec;
+    }
+
+    /// Helper function for normalize_compute_shape()
+    shape compute_two_or_more(std::vector<shape> inputs) const
+    {
+        auto input_shape    = inputs[0];
+        auto set_attributes = get_set_attributes();
+        // check that inputs [1, end) are all 1D, have the same
+        // dimension, and are static
+        check_shapes{inputs.begin() + 1,
+                     inputs.end(),
+                     std::string("SLICE: inputs (starts, ends, and input_axes)"),
+                     false}
+            .only_dims(1)
+            .same_dims();
+        auto dds = input_shape.to_dynamic().dyn_dims();
+        if(inputs.size() == 2)
        {
-            auto t = input_shape.type();
-            if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
-                   return not input_shape.dyn_dims()[axis].is_fixed();
-               }))
+            if(set_attributes == ends_axes)
            {
-                MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+                // attr ends and axes set; inputs are (data, input_starts)
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
            }
-            if(input_shape.dynamic())
+            else if(set_attributes == starts_axes)
            {
-                return shape{t,
-                             lens_calc(input_shape.min_lens(), starts, ends, axes),
-                             lens_calc(input_shape.max_lens(), starts, ends, axes),
-                             {}};
+                // attr starts and axes set; inputs are (data, input_ends)
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
+            }
+            else if(set_attributes == starts_ends)
+            {
+                // attr starts and ends set; inputs are (data, input_axes)
+                if(inputs[1].lens().at(0) != starts.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
+                }
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
            }
            else
            {
-                return shape{
-                    t, lens_calc(input_shape.lens(), starts, ends, axes), input_shape.strides()};
+                MIGRAPHX_THROW("SLICE: Invalid 2 input and attributes configuration");
            }
        }
-        else
+        else if(inputs.size() == 3)
        {
-            // check that starts, ends, and optionally input_axes are all 1D, have the same
-            // dimension, and are static
-            check_shapes{inputs.begin() + 1,
-                         inputs.end(),
-                         std::string("SLICE: inputs (starts, ends, and input_axes)"),
-                         false}
-                .only_dims(1)
-                .same_dims();
-            auto dds = input_shape.to_dynamic().dyn_dims();
-            if(inputs.size() == 3)
+            if(set_attributes == axes_only)
            {
+                // attr axes set; inputs are (data, input_starts, input_ends)
                if(inputs[1].lens().at(0) != axes.size())
                {
-                    MIGRAPHX_THROW("SLICE: inputs starts and ends do not have the same dimension "
-                                   "as the axes attribute");
+                    MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
                }
                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
                    dds.at(axis) = {0, dds.at(axis).max};
                });
            }
-            else
+            else if(set_attributes == ends_only)
+            {
+                // attr ends set; inputs are (data, input_starts, input_axes)
+                if(inputs[1].lens().at(0) != ends.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
+                }
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
+            }
+            else if(set_attributes == starts_only)
+
            {
-                // if axes is an input, then all the output dimensions could be 0 to the max value
+                // attr starts set; inputs are (data, input_ends, input_axes)
+                if(inputs[1].lens().at(0) != starts.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
+                }
                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
                    return shape::dynamic_dimension{0, dd.max};
                });
            }
-            return shape{input_shape.type(), dds};
+            else
+            {
+                MIGRAPHX_THROW("Invalid 3 input and attributes configuration");
+            }
+        }
+        else
+        {
+            // all 4 inputs (data, inputs_starts, input_ends, input_axes)
+            std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                return shape::dynamic_dimension{0, dd.max};
+            });
+        }
+        return shape{input_shape.type(), dds};
+    }
+
+    // uses the normalize_axes flag to normalize axes, starts, and ends
+    shape normalize_compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1, 2, 3, 4);
+        if(inputs.size() == 1)
+        {
+            auto input_shape    = inputs[0];
+            auto set_attributes = get_set_attributes();
+            if(set_attributes != all_set)
+            {
+                MIGRAPHX_THROW("SLICE 1_arg: Invalid 1 input and attributes configuration");
+            }
+            // NOTE: make sure to update how normalization works here if this type of slicing is
+            // changed to be allowed
+            if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
+                   return not input_shape.dyn_dims()[axis].is_fixed();
+               }))
+            {
+                MIGRAPHX_THROW(
+                    "SLICE 1_arg: slicing is not allowed on non-fixed dynamic input axis ");
+            }
+            if(input_shape.dynamic())
+            {
+                return shape{
+                    input_shape.type(),
+                    lens_calc(input_shape.min_lens(), this->starts, this->ends, this->axes),
+                    lens_calc(input_shape.max_lens(), this->starts, this->ends, this->axes),
+                    {}};
+            }
+            else
+            {
+                return shape{input_shape.type(),
+                             lens_calc(input_shape.lens(), this->starts, this->ends, this->axes),
+                             input_shape.strides()};
+            }
+        }
+        else
+        {
+            return compute_two_or_more(inputs);
        }
    }

@@ -194,14 +318,14 @@ struct slice

    /**
     * Calculates the starting offset for the sliced tensor (for aliasing).
-     * Used when the starts and/or the axes are inputs.
+     * Used for 2-4 inputs to `slice.
     *
     * \param s static input shape
     * \param input_starts starting indices of slice
     * \param ax_vec axes to slice on
     */
-    template <class IndView, class Axes>
-    auto compute_offset(const shape& s, const IndView& input_starts, const Axes& ax_vec) const
+    template <class T>
+    auto compute_offset(const shape& s, const T& input_starts, const T& ax_vec) const
    {
        auto ret = 0;
        for(std::size_t i = 0; i < ax_vec.size(); ++i)
@@ -212,106 +336,168 @@ struct slice
        return ret * s.type_size();
    }

-    std::unordered_map<std::string, std::vector<int64_t>>
-    normalize_inputs(const shape& input_shape,
-                     const std::vector<int64_t>& input_starts,
-                     const std::vector<int64_t>& input_ends) const
-    {
-        auto attrs = this->attributes().at("normalize_axes");
-        return {{"input_starts",
-                 normalize_indices(input_starts,
-                                   this->axes,
-                                   input_shape,
-                                   attrs.at("starts"),
-                                   "Slice variable input_starts")},
-                {"input_ends",
-                 normalize_indices(input_ends,
-                                   this->axes,
-                                   input_shape,
-                                   attrs.at("ends"),
-                                   "Slice variable input_ends")}};
-    }
-
    /**
-     * Three input version of the normalize_inputs.
-     * This one also checks that the input_axes are valid.
+     * If given, normalize the inputs. Otherwise get from operator attributes.
+     * Return the values in a map.
+     *
+     * Parameters
+     * input_shape: static shape of the input
+     * input_starts: optional
+     * input_ends: optional
+     * input_ends: optional
     */
    std::unordered_map<std::string, std::vector<int64_t>>
-    normalize_inputs(shape input_shape,
-                     const std::vector<int64_t>& input_starts,
-                     const std::vector<int64_t>& input_ends,
-                     const std::vector<int64_t>& input_axes) const
+    normalize_starts_ends_axes(shape input_shape,
+                               const optional<std::vector<int64_t>>& input_starts,
+                               const optional<std::vector<int64_t>>& input_ends,
+                               const optional<std::vector<int64_t>>& input_axes) const
    {
-        auto attrs = this->attributes().at("normalize_axes");
-        auto norm_axes =
-            normalize_axes(input_axes, input_shape, attrs.at("axes"), "Slice variable input_axes");
-        return {{"input_starts",
-                 normalize_indices(input_starts,
-                                   norm_axes,
-                                   input_shape,
-                                   attrs.at("starts"),
-                                   "Slice variable input_starts")},
-                {"input_ends",
-                 normalize_indices(input_ends,
-                                   norm_axes,
-                                   input_shape,
-                                   attrs.at("ends"),
-                                   "Slice variable input ends")},
-                {"input_axes", norm_axes}};
+        auto axes_attrs = this->attributes().at("normalize_axes");
+        std::vector<int64_t> norm_starts;
+        std::vector<int64_t> norm_ends;
+        std::vector<int64_t> norm_axes;
+        if(input_axes)
+        {
+            norm_axes = normalize_axes(input_axes.value(),
+                                       input_shape,
+                                       axes_attrs.at("axes"),
+                                       "Slice variable input_axes");
+        }
+        else
+        {
+            norm_axes = this->axes;
+        }
+        if(input_starts)
+        {
+            norm_starts = normalize_indices(input_starts.value(),
+                                            norm_axes,
+                                            input_shape,
+                                            axes_attrs.at("starts"),
+                                            "Slice variable input_starts");
+        }
+        else
+        {
+            norm_starts = this->starts;
+        }
+        if(input_ends)
+        {
+            norm_ends = normalize_indices(input_ends.value(),
+                                          norm_axes,
+                                          input_shape,
+                                          axes_attrs.at("ends"),
+                                          "Slice variable input ends");
+        }
+        else
+        {
+            norm_ends = this->ends;
+        }
+        return {{"norm_starts", norm_starts}, {"norm_ends", norm_ends}, {"norm_axes", norm_axes}};
    }

    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
        auto input       = args[0];
        auto input_shape = input.get_shape();
-        switch(args.size())
+        if(args.size() == 1)
        {
-        case 1: {
            std::size_t offset = compute_offset(input_shape);
            return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
        }
-        case 3: {
-            shape calc_shape;
-            std::size_t offset = 0;
-            visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
-                auto norm_inputs = normalize_inputs(input_shape,
-                                                    input_starts.template to_vector<int64_t>(),
-                                                    input_ends.template to_vector<int64_t>());
-                offset = compute_offset(input_shape, norm_inputs.at("input_starts"), this->axes);
-                calc_shape = {input_shape.type(),
-                              lens_calc(input_shape.lens(),
-                                        norm_inputs.at("input_starts"),
-                                        norm_inputs.at("input_ends"),
-                                        this->axes),
-                              input_shape.strides()};
-            });
-            return {calc_shape, [=] { return input.data() + offset; }};
-        }
-        case 4: {
-            shape calc_shape;
-            std::size_t offset = 0;
-            visit_all(args[1], args[2], args[3])(
-                [&](auto input_starts, auto input_ends, auto input_axes) {
-                    auto norm_inputs = normalize_inputs(input_shape,
-                                                        input_starts.template to_vector<int64_t>(),
-                                                        input_ends.template to_vector<int64_t>(),
-                                                        input_axes.template to_vector<int64_t>());
-                    offset           = compute_offset(
-                        input_shape, norm_inputs.at("input_starts"), norm_inputs.at("input_axes"));
-                    calc_shape = shape{input_shape.type(),
-                                       lens_calc(input_shape.lens(),
-                                                 norm_inputs.at("input_starts"),
-                                                 norm_inputs.at("input_ends"),
-                                                 norm_inputs.at("input_axes")),
-                                       input_shape.strides()};
+        else
+        {
+            // Note that we re-normalize both the attributes and inputs because of the non-fixed
+            // dynamic input shape case. It's possible to only re-normalize if slicing over
+            // non-fixed dynamic_dimensions.
+            auto set_attributes = get_set_attributes();
+            std::unordered_map<std::string, std::vector<int64_t>> norm_inputs;
+            if(set_attributes == ends_axes)
+            {
+                // attr ends and axes set; inputs are (data, input_starts)
+                args[1].visit([&](auto input_starts) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   input_starts.template to_vector<int64_t>(),
+                                                   this->ends,
+                                                   this->axes);
+                });
+            }
+            else if(set_attributes == starts_axes)
+            {
+                // attr starts and axes set; inputs are (data, input_ends)
+                args[1].visit([&](auto input_ends) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   this->starts,
+                                                   input_ends.template to_vector<int64_t>(),
+                                                   this->axes);
+                });
+            }
+            else if(set_attributes == starts_ends)
+            {
+                // attr starts and ends set; inputs are (data, input_axes)
+                args[1].visit([&](auto input_axes) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   this->starts,
+                                                   this->ends,
+                                                   input_axes.template to_vector<int64_t>());
                });
+            }
+            else if(set_attributes == axes_only)
+            {
+                // attr axes set; inputs are (data, input_starts, input_ends)
+                visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   input_starts.template to_vector<int64_t>(),
+                                                   input_ends.template to_vector<int64_t>(),
+                                                   this->axes);
+                });
+            }
+            else if(set_attributes == ends_only)
+            {
+                // attr ends set; inputs are (data, input_starts, input_axes)
+                visit_all(args[1], args[2])([&](auto input_starts, auto input_axes) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   input_starts.template to_vector<int64_t>(),
+                                                   this->ends,
+                                                   input_axes.template to_vector<int64_t>());
+                });
+            }
+            else if(set_attributes == starts_only)
+            {
+                // attr starts set; inputs are (data, input_ends, input_axes)
+                visit_all(args[1], args[2])([&](auto input_ends, auto input_axes) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   this->starts,
+                                                   input_ends.template to_vector<int64_t>(),
+                                                   input_axes.template to_vector<int64_t>());
+                });
+            }
+            else
+            {
+                // no attr set, all inputs
+                visit_all(args[1], args[2], args[3])(
+                    [&](auto input_starts, auto input_ends, auto input_axes) {
+                        norm_inputs =
+                            normalize_starts_ends_axes(input_shape,
+                                                       input_starts.template to_vector<int64_t>(),
+                                                       input_ends.template to_vector<int64_t>(),
+                                                       input_axes.template to_vector<int64_t>());
+                    });
+            }
+            auto offset = compute_offset(
+                input_shape, norm_inputs.at("norm_starts"), norm_inputs.at("norm_axes"));
+            shape calc_shape = shape{input_shape.type(),
+                                     lens_calc(input_shape.lens(),
+                                               norm_inputs.at("norm_starts"),
+                                               norm_inputs.at("norm_ends"),
+                                               norm_inputs.at("norm_axes")),
+                                     input_shape.strides()};
            return {calc_shape, [=] { return input.data() + offset; }};
        }
-        default: {
-            // Should never get here; covering in case some code change occurs
-            MIGRAPHX_THROW("SLICE: invalid number of inputs");
-        }
-        }
    }

    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -84,6 +84,7 @@
 #include <migraphx/op/mod.hpp>
 #include <migraphx/op/mul.hpp>
 #include <migraphx/op/multibroadcast.hpp>
+#include <migraphx/op/nearbyint.hpp>
 #include <migraphx/op/neg.hpp>
 #include <migraphx/op/nonmaxsuppression.hpp>
 #include <migraphx/op/nonzero.hpp>
@@ -110,7 +111,6 @@
 #include <migraphx/op/rnn_variable_seq_lens.hpp>
 #include <migraphx/op/rnn_var_sl_last_output.hpp>
 #include <migraphx/op/roialign.hpp>
-#include <migraphx/op/round.hpp>
 #include <migraphx/op/rsqrt.hpp>
 #include <migraphx/op/scalar.hpp>
 #include <migraphx/op/scatter_add.hpp>

--- a/src/include/migraphx/streamutils.hpp
+++ b/src/include/migraphx/streamutils.hpp
@@ -30,6 +30,7 @@
 #include <migraphx/rank.hpp>
 #include <migraphx/requires.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/optional.hpp>
 #include <vector>

 namespace migraphx {
@@ -68,6 +69,19 @@ auto stream_write_value_impl(rank<1>, std::ostream& os, const T& x) -> decltype(
    os << x;
 }

+template <class T>
+auto stream_write_value_impl(rank<1>, std::ostream& os, const optional<T>& x)
+{
+    if(x.has_value())
+    {
+        os << *x;
+    }
+    else
+    {
+        os << "nullopt";
+    }
+}
+
 template <class T>
 void stream_write_value_impl(rank<1>, std::ostream& os, const std::vector<T>& r)
 {

--- a/src/normalize_attributes.cpp
+++ b/src/normalize_attributes.cpp
@@ -66,15 +66,15 @@ auto tune_attribute(const std::vector<int64_t>& vec,
    {
        if(input_shape.dynamic())
        {
+            // return the unchanged `vec` if the dynamic_dimensions at `axes` are not fixed
+            if(std::any_of(axes.begin(), axes.end(), [&](auto ax) {
+                   return not input_shape.dyn_dims().at(ax).is_fixed();
+               }))
+            {
+                return vec;
+            }
            std::transform(axes.begin(), axes.end(), max_vals.begin(), [&](auto i) {
-                const auto& dd = input_shape.dyn_dims().at(i);
-                if(not dd.is_fixed())
-                {
-                    MIGRAPHX_THROW(
-                        "NORMALIZE_ATTR: 'use_lens' on a non-fixed dynamic dimension, axis=" +
-                        std::to_string(i));
-                }
-                return dd.max;
+                return input_shape.dyn_dims().at(i).max;
            });
        }
        else

--- a/src/onnx/include/migraphx/onnx/onnx_parser.hpp
+++ b/src/onnx/include/migraphx/onnx/onnx_parser.hpp
@@ -97,10 +97,11 @@ struct onnx_parser
    shape::dynamic_dimension default_dyn_dim_value = {1, 1};
    std::unordered_map<std::string, std::vector<std::size_t>> map_input_dims;
    std::unordered_map<std::string, std::vector<shape::dynamic_dimension>> map_dyn_input_dims;
-    bool use_dyn_output         = false;
-    bool skip_unknown_operators = false;
-    int64_t max_loop_iterations = 10;
-    int64_t opset_version       = 13;
+    bool use_dyn_output          = false;
+    bool skip_unknown_operators  = false;
+    int64_t max_loop_iterations  = 10;
+    int64_t limit_max_iterations = std::numeric_limits<uint16_t>::max();
+    int64_t opset_version        = 13;

    std::unordered_map<std::string, op_func> ops;


--- a/src/onnx/onnx.cpp
+++ b/src/onnx/onnx.cpp
@@ -67,6 +67,7 @@ program parse_onnx_from(const onnx_options& options, Ts&&... xs)
    }
    parser.skip_unknown_operators = options.skip_unknown_operators;
    parser.max_loop_iterations    = options.max_loop_iterations;
+    parser.limit_max_iterations   = options.limit_max_iterations;
    parser.use_dyn_output         = options.use_dyn_output;

    if(options.print_program_on_error)

--- a/src/onnx/parse_clip.cpp
+++ b/src/onnx/parse_clip.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/onnx/parse_generic_op.cpp
+++ b/src/onnx/parse_generic_op.cpp
@@ -60,7 +60,7 @@ struct parse_generic_op : op_parser<parse_generic_op>
                {"Neg", "neg"},
                {"Reciprocal", "recip"},
                {"Relu", "relu"},
-                {"Round", "round"},
+                {"Round", "nearbyint"},
                {"Sigmoid", "sigmoid"},
                {"Sign", "sign"},
                {"Sin", "sin"},

--- a/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,43 +21,67 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
-#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
-
-#include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
-#include <utility>
+#include <migraphx/onnx/op_parser.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/instruction.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct context;
+namespace onnx {

-struct hip_int8_gemm_pack_a
+struct parse_isinf : op_parser<parse_isinf>
 {
-    std::string name() const { return "gpu::int8_gemm_pack_a"; }
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const;
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
-};
+    std::vector<op_desc> operators() const { return {{"IsInf", "isinf"}}; }

-struct hip_int8_gemm_pack_b
-{
-    std::string name() const { return "gpu::int8_gemm_pack_b"; }
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const;
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    instruction_ref parse(const op_desc& /*opd*/,
+                          const onnx_parser& parser,
+                          onnx_parser::node_info info,
+                          const std::vector<instruction_ref>& args) const
    {
-        return shapes.size() - 1;
+        bool detect_negative = true;
+        bool detect_positive = true;
+        if(contains(info.attributes, "detect_negative"))
+        {
+            detect_negative = static_cast<bool>(
+                parser.parse_value(info.attributes.at("detect_negative")).at<int>());
+        }
+
+        if(contains(info.attributes, "detect_positive"))
+        {
+            detect_positive = static_cast<bool>(
+                parser.parse_value(info.attributes.at("detect_positive")).at<int>());
+        }
+
+        auto x_shape = args[0]->get_shape();
+        if(not detect_negative and not detect_positive)
+        {
+            return info.add_instruction(
+                make_op("multibroadcast", {{"out_lens", x_shape.lens()}}),
+                info.add_literal(migraphx::literal{migraphx::shape{shape::bool_type}, {false}}));
+        }
+
+        auto is_inf = info.add_instruction(make_op("isinf"), args[0]);
+        if(detect_negative and detect_positive)
+        {
+            return is_inf;
+        }
+
+        auto zero_l = info.add_literal(migraphx::literal{migraphx::shape{x_shape.type()}, {0}});
+        auto mb_zero =
+            info.add_instruction(make_op("multibroadcast", {{"out_lens", x_shape.lens()}}), zero_l);
+
+        auto cond = info.add_broadcastable_binary_op(
+            detect_negative ? "less" : "greater", args[0], mb_zero);
+        if(cond->get_shape().type() != shape::bool_type)
+        {
+            cond =
+                info.add_instruction(make_op("convert", {{"target_type", shape::bool_type}}), cond);
+        }
+        return info.add_instruction(make_op("logical_and"), is_inf, cond);
    }
 };

-} // namespace gpu
+} // namespace onnx
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
-
-#endif
--- a/src/onnx/parse_loop.cpp
+++ b/src/onnx/parse_loop.cpp
@@ -58,6 +58,16 @@ struct parse_loop : op_parser<parse_loop>
            }
        }

+        // cap max_iter because loop uses static shapes with max_iter size and huge numbers
+        // here can cause overflow
+        if(max_iterations > parser.limit_max_iterations)
+        {
+            std::cerr << "WARNING: PARSE_LOOP max_iterations exceeds the maximum loop "
+                         "iterations limit, it will be changed from "
+                      << max_iterations << " to " << parser.limit_max_iterations << ".\n";
+            max_iterations = parser.limit_max_iterations;
+        }
+
        // condition input is empty
        if(args.at(1)->name() == "undefined")
        {