Merge branch 'develop' into enable_navi_32_ci

0c98c38e · Ted Themistokleous · GitHub · 1612d8f3 · 64b306ab · 0c98c38e
Unverified Commit 0c98c38e authored Sep 12, 2023 by Ted Themistokleous Committed by GitHub Sep 12, 2023
20 changed files
--- a/src/include/migraphx/op/gather.hpp
+++ b/src/include/migraphx/op/gather.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -125,13 +125,12 @@ struct gather
                    auto out_lens  = data.get_shape().lens();
                    out_lens[axis] = indices.get_shape().elements();
                    migraphx::shape out_comp_shape{data.get_shape().type(), out_lens};
-                    shape_for_each(out_comp_shape, [&](const auto& out_idx) {
-                        auto data_idx  = out_idx;
-                        auto in_index  = indices[data_idx[axis]];
-                        in_index       = (in_index < 0) ? in_index + axis_dim_size : in_index;
-                        data_idx[axis] = in_index;
-                        output[out_comp_shape.index(out_idx.begin(), out_idx.end())] =
-                            data(data_idx.begin(), data_idx.end());
+                    shape_for_each(out_comp_shape, [&](const auto& out_idx_v, size_t out_idx) {
+                        auto data_idx   = out_idx_v;
+                        auto in_index   = indices[data_idx[axis]];
+                        in_index        = (in_index < 0) ? in_index + axis_dim_size : in_index;
+                        data_idx[axis]  = in_index;
+                        output[out_idx] = data(data_idx.begin(), data_idx.end());
                    });
                }
            });

--- a/src/include/migraphx/op/nonmaxsuppression.hpp
+++ b/src/include/migraphx/op/nonmaxsuppression.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -258,7 +258,7 @@ struct nonmaxsuppression
        selected_boxes_inside_class.reserve(max_output_shape.elements());
        // iterate over batches and classes
        shape comp_s{shape::double_type, {num_batches, num_classes}};
-        shape_for_each(comp_s, [&](auto idx) {
+        shape_for_each(comp_s, [&](const auto& idx) {
            auto batch_idx = idx[0];
            auto class_idx = idx[1];
            // index offset for this class

--- a/src/include/migraphx/op/nonzero.hpp
+++ b/src/include/migraphx/op/nonzero.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -56,10 +56,10 @@ struct nonzero
        std::vector<std::vector<std::size_t>> vec_idx;
        auto s = args.front().get_shape();
        args.front().visit([&](auto v) {
-            shape_for_each(s, [&](auto idx) {
-                if(not float_equal(v[s.index(idx)], 0))
+            shape_for_each(s, [&](const auto& idx_v, size_t idx) {
+                if(not float_equal(v[idx], 0))
                {
-                    vec_idx.push_back(idx);
+                    vec_idx.push_back(idx_v);
                }
            });
        });

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/argument.hpp>
+#include <migraphx/pad_calc.hpp>
 #include <migraphx/par_for.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/dyn_output.hpp>
@@ -40,10 +41,20 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

+// The Pooling operator mostly follows the specifications for the Onnx pooling op.
+// It assumes an NCHW layout, extended to support any number of spatial dimensions
+// from 1 on up; dimensions are <batch index, channels, spatial dimensions...>
+//
 struct pooling
 {
+    //  Class members mode, ceil_mode, padding_mode have similar names but refer to separate
+    //  concepts.
    pooling_mode mode = {pooling_mode::average};

+    // If the input has rank other than 4 then padding, stride, lengths must all be specified
+    // since the defaults have 2-dimensions.  Exception: padding not required if
+    // padding_mode != default_
+
    // Padding along each spatial input dimension
    // Can be ndim or 2*ndim values where ndim is size of lengths
    // ndim values means pad the same before and after each dimension
@@ -63,13 +74,14 @@ struct pooling

    // ceiling mode is a flag affecting output size
    // or equivalently, placements of the pooling kernel.
-    // When true, round the size upwards, possibly
-    // including partial placements where the kernel extends beyond the edge
-    // of input and even padding.  When false, round down so that all
+    // When true, round the size upwards.  When false, round down so that all
    // kernel placements fit but some input values may be dropped.
    bool ceil_mode = false;
    int lp_order   = 2;

+    // Mode for auto padding.  default_ indicates no auto padding.
+    padding_mode_t padding_mode = padding_mode_t::default_;
+
    // Global pooling with dynamic shape input
    bool dyn_global = false;

@@ -84,6 +96,7 @@ struct pooling
    {
        return pack(f(self.mode, "mode"),
                    f(self.padding, "padding"),
+                    f(self.padding_mode, "padding_mode"),
                    f(self.stride, "stride"),
                    f(self.lengths, "lengths"),
                    f(self.ceil_mode, "ceil_mode"),
@@ -97,7 +110,8 @@ struct pooling
    {
        if(dyn_global)
            return;
-        if((padding.size() != stride.size() and (padding.size()) != stride.size() * 2) or
+        if((padding_mode != default_ and padding.size() != stride.size() and
+            (padding.size()) != stride.size() * 2) or
           stride.size() != lengths.size())
        {
            MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
@@ -137,8 +151,19 @@ struct pooling
            std::size_t padding_factor = 2 * padding[i];
            if(padding.size() == 2 * kdims)
                padding_factor = padding[i] + padding[i + kdims];
-            assert(input_lens[i + 2] + padding_factor >= lengths[i]);
-            std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
+            std::size_t dim_size;
+            if(input_lens[i + 2] + padding_factor < lengths[i])
+            {
+                if(padding_mode == default_)
+                    MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size");
+                // lengths can be legitimately larger only if we're doing auto padding
+                // with a dynamic shape, in which case given padding is ignored.  Set a dummy value.
+                dim_size = 2;
+            }
+            else
+            {
+                dim_size = input_lens[i + 2] + padding_factor - lengths[i];
+            }
            std::size_t len =
                (ceil_mode)
                    ? dim_size / stride[i] +
@@ -151,17 +176,13 @@ struct pooling

    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        check_shapes{inputs, *this, true}.has(1).min_ndims(3);
        check_attribute_size();

        const shape& input = inputs.at(0);
-        auto padding_size  = padding.size();
+        auto stride_size   = stride.size();
        size_t kdims       = input.ndim() - 2;
-        if(input.ndim() < 3)
-        {
-            MIGRAPHX_THROW("POOLING: input must have 3 or more dimensions and be nonempty");
-        }
-        if(input.ndim() * 2 != padding_size + 4 and input.ndim() != padding_size + 2)
+        if(input.ndim() != stride_size + 2)
        {
            MIGRAPHX_THROW("POOLING: input and attribute size mismatch!");
        }
@@ -179,6 +200,28 @@ struct pooling
                }
                return {input.type(), output_dyn_dims};
            }
+            else if(padding_mode != default_)
+            {
+                const size_t num_spatial_dims = inputs[0].ndim() - 2;
+                const shape& x_shape          = inputs[0];
+                // same as convolution::dynamic_compute_shape()
+
+                for(std::size_t i = 0; i < num_spatial_dims; ++i)
+                {
+                    auto ceil_div = [](std::size_t x, std::size_t y) { return (x + y - 1) / y; };
+                    auto s        = stride[i];
+
+                    auto x = x_shape.dyn_dims()[i + 2];
+                    std::set<std::size_t> optimals{};
+                    std::transform(x.optimals.begin(),
+                                   x.optimals.end(),
+                                   std::inserter(optimals, optimals.begin()),
+                                   [&](auto o) { return ceil_div(o, s); });
+                    output_dyn_dims.push_back(
+                        shape::dynamic_dimension{ceil_div(x.min, s), ceil_div(x.max, s), optimals});
+                }
+                return {input.type(), output_dyn_dims};
+            }
            else
            {
                // does not compute optimals
@@ -267,6 +310,7 @@ struct pooling
                      Out& output,
                      const In& input,
                      const std::vector<std::size_t>& kernel_dims,
+                      const std::vector<std::size_t>& padding_vals,
                      Op op) const
    {
        auto in_s    = input.get_shape();
@@ -283,9 +327,9 @@ struct pooling
            // For each spatial dimension, find starting and ending index of pooling kernel
            for(std::size_t dim = 2; dim < n_dim; ++dim)
            {
-                auto d_2 = dim - 2;
-                int start =
-                    static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]);
+                auto d_2  = dim - 2;
+                int start = static_cast<int>(idx_o[dim] * stride[d_2]) -
+                            static_cast<int>(padding_vals[d_2]);
                int end;
                // NOLINT
                if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
@@ -297,7 +341,7 @@ struct pooling

                    // Check if this kernel extends beyond the padding at end of dimension
                    end = std::min(start + kernel_dims[d_2],
-                                   in_lens[dim] + static_cast<int>(padding[d_2]));
+                                   in_lens[dim] + static_cast<int>(padding_vals[d_2]));
                }
                else
                {
@@ -316,11 +360,12 @@ struct pooling
            }

            shape win_shape{output_shape.type(), win_size};
+
            auto pool_size    = win_shape.elements();
            double output_val = op.template init<Type>();

            // for each element in the window...
-            shape_for_each(win_shape, [&](auto idx_w) {
+            shape_for_each(win_shape, [&](const auto& idx_w) {
                // the coordinates of this element
                auto idx = idx_o;

@@ -354,30 +399,65 @@ struct pooling

    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{dyn_out.computed_shape};
+        argument result;
        auto input_lens = args[0].get_shape().lens();
        std::vector<std::size_t> kernel_dims;
+        shape output_shape;
+        // If we have to auto-calculate padding, it will be passed to calc_pooling() as an argument
+        // instead of the member variable padding.
+        std::vector<std::size_t> temp_padding(padding);
        if(dyn_global)
        {
+            // for dynamic GlobalPooling, there's no padding
            kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
+            output_shape = dyn_out.computed_shape;
+            result       = dyn_out.computed_shape;
        }
-        else
+        else if((padding_mode != op::padding_mode_t::default_))
        {
+            // if padding_mode is set, input was a dynamic size.  Calculate padded size now.
+
+            // kernel_lens is the same as kernel_dims, but prepended with the 2 non-
+            // spatial dimensions.  For size computations, it's used like the weights
+            // tensor for convolutions.
+            std::vector<std::size_t> kernel_lens;
+            kernel_lens.insert(kernel_lens.end(), input_lens.begin(), input_lens.begin() + 2);
+            kernel_lens.insert(kernel_lens.end(), lengths.begin(), lengths.end());
            kernel_dims = this->lengths;
+
+            auto type = args[0].get_shape().type();
+            // dilation not currently supported for pooling, so default to all 1's
+            temp_padding = calc_dyn_auto_pad(
+                input_lens, kernel_lens, stride, {1, 1}, bool(padding_mode == op::same_upper));
+
+            output_shape = compute_padded_pool_shape(
+                args[0].get_shape(), shape(type, kernel_dims), temp_padding, stride, {1, 1});
+
+            result = argument(output_shape);
+        }
+        else // fixed/static input
+        {
+            kernel_dims  = this->lengths;
+            output_shape = dyn_out.computed_shape;
+            result       = dyn_out.computed_shape;
        }
+
+        // Perform the computation and populate result
        visit_all(result, args[0])([&](auto output, auto input) {
            using type = typename decltype(output)::value_type;
            switch(mode)
            {
            case migraphx::op::pooling_mode::average:
-                calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, avg_pool{});
+                calc_pooling<type>(
+                    output_shape, output, input, kernel_dims, temp_padding, avg_pool{});
                break;
            case migraphx::op::pooling_mode::max:
-                calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, max_pool{});
+                calc_pooling<type>(
+                    output_shape, output, input, kernel_dims, temp_padding, max_pool{});
                break;
            case migraphx::op::pooling_mode::lpnorm:
                calc_pooling<type>(
-                    dyn_out.computed_shape, output, input, kernel_dims, lpnorm_pool{lp_order});
+                    output_shape, output, input, kernel_dims, temp_padding, lpnorm_pool{lp_order});
                break;
            }
        });

--- a/src/include/migraphx/op/reduce_op.hpp
+++ b/src/include/migraphx/op/reduce_op.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -163,7 +163,7 @@ struct reduce_op : op_name<Derived>
        auto& self        = static_cast<const Derived&>(*this);
        auto data_idx     = out_idx;
        accumulator val   = self.init();
-        shape_for_each(batch_shape, [&](auto b_idx) {
+        shape_for_each(batch_shape, [&](const auto& b_idx) {
            this->tune_dims(tuned_axes, b_idx, data_idx);
            accumulator x = input(data_idx.begin(), data_idx.end());
            val           = self.op()(accumulator{self.input()(x)}, val);

--- a/src/include/migraphx/op/reverse.hpp
+++ b/src/include/migraphx/op/reverse.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -70,13 +70,13 @@ struct reverse
        argument result{s};
        auto lens = s.lens();
        visit_all(result, args.front())([&](auto output, auto input) {
-            shape_for_each(s, [&](const auto& out_idx) {
-                auto in_idx = out_idx;
+            shape_for_each(s, [&](const auto& out_idx_v, size_t out_idx) {
+                auto in_idx = out_idx_v;
                for(const auto& axis : axes)
                {
-                    in_idx[axis] = lens[axis] - 1 - out_idx[axis];
+                    in_idx[axis] = lens[axis] - 1 - out_idx_v[axis];
                }
-                output[s.index(out_idx)] = input[s.index(in_idx)];
+                output[out_idx] = input[s.index(in_idx)];
            });
        });


--- a/src/include/migraphx/op/roialign.hpp
+++ b/src/include/migraphx/op/roialign.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -113,10 +113,9 @@ struct roialign
    {
        std::vector<pos_weight> results(bin_grid_size[0] * bin_grid_size[1] * output_height *
                                        output_width);
-        shape_for_each(comp_s, [&](auto idx) {
-            std::array<std::size_t, 2> p = {idx[0], idx[1]};
-            std::array<std::size_t, 2> i = {idx[2], idx[3]};
-            auto index                   = comp_s.index(idx);
+        shape_for_each(comp_s, [&](const auto& idx_v, size_t index) {
+            std::array<std::size_t, 2> p = {idx_v[0], idx_v[1]};
+            std::array<std::size_t, 2> i = {idx_v[2], idx_v[3]};

            std::array<float, 2> xy{};
            std::array<int64_t, 2> low{};
@@ -255,7 +254,7 @@ struct roialign
                std::vector<std::size_t> comp_lens1 = {channels, out_dims[0], out_dims[1]};
                shape comp_s1{migraphx::shape::float_type, comp_lens1};
                std::vector<int64_t> vec_index(channels, 0);
-                shape_for_each(comp_s1, [&](auto idx) {
+                shape_for_each(comp_s1, [&](const auto& idx) {
                    auto c  = idx[0];
                    auto ph = idx[1];
                    auto pw = idx[2];

--- a/src/include/migraphx/op/scatter.hpp
+++ b/src/include/migraphx/op/scatter.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
@@ -27,19 +27,34 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
-#include <migraphx/dyn_output.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/normalize_attributes.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

+/**
+ * Slice operator that accepts variable axes, starts and ends.
+ *
+ * Attributes:
+ * axes: constant axes to slice over (optional)
+ * starts: constant slice starting indices (optional)
+ * ends: constant slice ending indices (optional)
+ *
+ * Parameters:
+ * data: the input tensor to slice (dynamic or static shape)
+ * input_starts: starting indicies of slice (optional, static shape)
+ * input_ends: ending indicies of slice (optional, static shape)
+ * input_axes: axes to slice over (optional, static shape)
+ */
 struct slice
 {
-    std::vector<int64_t> axes;
-    std::vector<int64_t> starts;
-    std::vector<int64_t> ends;
+    std::vector<int64_t> axes{};
+    std::vector<int64_t> starts{};
+    std::vector<int64_t> ends{};

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -48,8 +63,8 @@ struct slice
    }

    /**
-     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are in
-     * limits.
+     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are
+     * within limits.
     */
    value attributes() const
    {
@@ -70,6 +85,90 @@ struct slice

    std::string name() const { return "slice"; }

+    /**
+     * Computes the slice output shape dimensions for given starts, ends,and axes.
+     * Templated to also handle tensor views.
+     * Possibily different type between [in_starts, in_ends] and [in_axes] if in_axes is this
+     * object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
+     */
+    template <class A, class B>
+    std::vector<std::size_t>
+    lens_calc(const std::vector<std::size_t>& lengths, A in_starts, A in_ends, B in_axes) const
+    {
+        auto new_lens = lengths;
+        for(std::size_t i = 0; i < in_axes.size(); ++i)
+        {
+            auto axis      = in_axes[i];
+            new_lens[axis] = in_ends[i] - in_starts[i];
+        }
+        return new_lens;
+    }
+
+    shape normalize_compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1, 3, 4);
+        auto input_shape = inputs[0];
+        if(inputs.size() == 1)
+        {
+            auto t = input_shape.type();
+            if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
+                   return not input_shape.dyn_dims()[axis].is_fixed();
+               }))
+            {
+                MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+            }
+            if(input_shape.dynamic())
+            {
+                return shape{t,
+                             lens_calc(input_shape.min_lens(), starts, ends, axes),
+                             lens_calc(input_shape.max_lens(), starts, ends, axes),
+                             {}};
+            }
+            else
+            {
+                return shape{
+                    t, lens_calc(input_shape.lens(), starts, ends, axes), input_shape.strides()};
+            }
+        }
+        else
+        {
+            // check that starts, ends, and optionally input_axes are all 1D, have the same
+            // dimension, and are static
+            check_shapes{inputs.begin() + 1,
+                         inputs.end(),
+                         std::string("SLICE: inputs (starts, ends, and input_axes)"),
+                         false}
+                .only_dims(1)
+                .same_dims();
+            auto dds = input_shape.to_dynamic().dyn_dims();
+            if(inputs.size() == 3)
+            {
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: inputs starts and ends do not have the same dimension "
+                                   "as the axes attribute");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
+            }
+            else
+            {
+                // if axes is an input, then all the output dimensions could be 0 to the max value
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
+            }
+            return shape{input_shape.type(), dds};
+        }
+    }
+
+    /**
+     * Calculates the starting offset for the sliced tensor.
+     * Used in compute when only data input and all other information are in the attributes.
+     *
+     * \param s static input shape
+     */
    auto compute_offset(const shape& s) const
    {
        const std::vector<std::size_t>& lens    = s.lens();
@@ -90,80 +189,131 @@ struct slice
                offset += starts[axis] * strides[axis];
            }
        }
-        return offset;
+        return offset * s.type_size();
    }

-    shape normalize_compute_shape(std::vector<shape> inputs) const
+    /**
+     * Calculates the starting offset for the sliced tensor (for aliasing).
+     * Used when the starts and/or the axes are inputs.
+     *
+     * \param s static input shape
+     * \param input_starts starting indices of slice
+     * \param ax_vec axes to slice on
+     */
+    template <class IndView, class Axes>
+    auto compute_offset(const shape& s, const IndView& input_starts, const Axes& ax_vec) const
    {
-        check_shapes{inputs, *this, true}.has(1);
-        auto input_shape = inputs[0];
-        auto t           = input_shape.type();
-
-        // TODO:  When support for dynamic shapes is added to normalize_attributes,
-        //  remove this restriction.
-        if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
-               return not input_shape.dyn_dims()[axis].is_fixed();
-           }))
+        auto ret = 0;
+        for(std::size_t i = 0; i < ax_vec.size(); ++i)
        {
-            MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+            auto axis = ax_vec[i];
+            ret += input_starts[i] * s.strides().at(axis);
        }
+        return ret * s.type_size();
+    }
+
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(const shape& input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends) const
+    {
+        auto attrs = this->attributes().at("normalize_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input_ends")}};
+    }
+
+    /**
+     * Three input version of the normalize_inputs.
+     * This one also checks that the input_axes are valid.
+     */
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(shape input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends,
+                     const std::vector<int64_t>& input_axes) const
+    {
+        auto attrs = this->attributes().at("normalize_axes");
+        auto norm_axes =
+            normalize_axes(input_axes, input_shape, attrs.at("axes"), "Slice variable input_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input ends")},
+                {"input_axes", norm_axes}};
+    }

-        // For a static shape, old_lens will be adjusted to a new size
-        // for those axes that are sliced.
-        // For dynamic shape, the adjusted old_lens become the new max values,
-        // while updating the old mins and optimals if possible.
-        std::vector<std::size_t> new_mins;
-        std::vector<std::size_t> old_lens;
-        std::vector<std::size_t> old_strides;
-        // Doesn't handle optimals
-        if(input_shape.dynamic())
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
+    {
+        auto input       = args[0];
+        auto input_shape = input.get_shape();
+        switch(args.size())
        {
-            old_lens = input_shape.max_lens();
-            new_mins = input_shape.min_lens();
+        case 1: {
+            std::size_t offset = compute_offset(input_shape);
+            return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
        }
-        else
-        {
-            old_lens = input_shape.lens();
-            // For static shape (including during eval step after a dynamic input) the strides are
-            // indexed into the pre-slice array, so they are larger than the apparent size of the
-            // resulting shape.
-            old_strides = input_shape.strides();
+        case 3: {
+            shape calc_shape;
+            std::size_t offset = 0;
+            visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
+                auto norm_inputs = normalize_inputs(input_shape,
+                                                    input_starts.template to_vector<int64_t>(),
+                                                    input_ends.template to_vector<int64_t>());
+                offset = compute_offset(input_shape, norm_inputs.at("input_starts"), this->axes);
+                calc_shape = {input_shape.type(),
+                              lens_calc(input_shape.lens(),
+                                        norm_inputs.at("input_starts"),
+                                        norm_inputs.at("input_ends"),
+                                        this->axes),
+                              input_shape.strides()};
+            });
+            return {calc_shape, [=] { return input.data() + offset; }};
        }
-
-        std::vector<std::size_t> new_lens = old_lens;
-        for(std::size_t i = 0; i < axes.size(); i++)
-        {
-            auto axis            = axes[i];
-            size_t sliced_length = ends[i] - starts[i];
-            // A Numpy indexing convention: a slice size larger than the actual dimension
-            // is legal and the "ends" value is clipped to the axis size
-            new_lens[axis] = std::min(new_lens[axis], sliced_length);
-            if(input_shape.dynamic())
-            {
-                // TODO: when non-fixed shape slicing is allowed, this will be different than
-                // sliced_length, making use of TBD start/end values.
-                std::size_t sliced_min_length = ends[i] - starts[i];
-                // if the slice size is smaller than maxes but larger than mins
-                new_mins[axis] = std::min(sliced_min_length, new_mins[axis]);
-            }
+        case 4: {
+            shape calc_shape;
+            std::size_t offset = 0;
+            visit_all(args[1], args[2], args[3])(
+                [&](auto input_starts, auto input_ends, auto input_axes) {
+                    auto norm_inputs = normalize_inputs(input_shape,
+                                                        input_starts.template to_vector<int64_t>(),
+                                                        input_ends.template to_vector<int64_t>(),
+                                                        input_axes.template to_vector<int64_t>());
+                    offset           = compute_offset(
+                        input_shape, norm_inputs.at("input_starts"), norm_inputs.at("input_axes"));
+                    calc_shape = shape{input_shape.type(),
+                                       lens_calc(input_shape.lens(),
+                                                 norm_inputs.at("input_starts"),
+                                                 norm_inputs.at("input_ends"),
+                                                 norm_inputs.at("input_axes")),
+                                       input_shape.strides()};
+                });
+            return {calc_shape, [=] { return input.data() + offset; }};
        }
-        if(input_shape.dynamic())
-        {
-            return shape{t, new_mins, new_lens, {}};
+        default: {
+            // Should never get here; covering in case some code change occurs
+            MIGRAPHX_THROW("SLICE: invalid number of inputs");
        }
-        else
-        {
-            return shape{t, new_lens, old_strides};
        }
    }

-    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
-    {
-        auto input = args[0];
-
-        auto offset = compute_offset(input.get_shape()) * dyn_out.computed_shape.type_size();
-        return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
-    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };


--- a/src/include/migraphx/pad_calc.hpp
+++ b/src/include/migraphx/pad_calc.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -62,6 +62,14 @@ shape compute_padded_shape(const shape& input,
                           const std::vector<std::size_t>& stride,
                           const std::vector<std::size_t>& dilation);

+// Used for dynamic auto padding of pooling operators where padding needs to be computed at
+// evaulation time.
+shape compute_padded_pool_shape(const shape& input,
+                                const shape& kernel,
+                                const std::vector<std::size_t>& padding,
+                                const std::vector<std::size_t>& stride,
+                                const std::vector<std::size_t>& dilation);
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/include/migraphx/ranges.hpp
+++ b/src/include/migraphx/ranges.hpp
@@ -205,7 +205,7 @@ void transform(Range1&& r1, Range2&& r2, Iterator it, F f)
 }

 template <class Range>
-auto reverse(Range& r)
+auto reverse(Range&& r)
 {
    return range(std::make_reverse_iterator(r.end()), std::make_reverse_iterator(r.begin()));
 }

--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -263,7 +263,7 @@ struct MIGRAPHX_EXPORT shape
    /// no padding
    bool packed() const;

-    /// Returns true is the shape has been transposed. That is the strides are not in descending
+    /// Returns true if the shape has been transposed. That is the strides are not in descending
    /// order
    bool transposed() const;


--- a/src/include/migraphx/shape_for_each.hpp
+++ b/src/include/migraphx/shape_for_each.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -37,11 +37,11 @@ inline namespace MIGRAPHX_INLINE_NS {
 template <class F>
 void shape_for_each(const migraphx::shape& s, F f)
 {
-    // Ensure calls to f use const ref to vector
-    auto call = [&f](const std::vector<std::size_t>& i) { f(i); };
    std::vector<std::size_t> indices(s.lens().size());
+    const auto& index_const_ref = indices;
    shape ss{s.type(), s.lens()};
-    for(std::size_t i = 0; i < ss.elements(); i++)
+    size_t max = ss.elements();
+    for(std::size_t i = 0; i < max; i++)
    {
        std::transform(ss.strides().begin(),
                       ss.strides().end(),
@@ -51,9 +51,13 @@ void shape_for_each(const migraphx::shape& s, F f)
                           assert(len > 0 and stride > 0);
                           return (i / stride) % len;
                       });
-        call(indices);
+        if constexpr(std::is_invocable<F, decltype(index_const_ref), decltype(i)>{})
+            f(index_const_ref, i);
+        else
+            f(index_const_ref);
    }
 }
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/include/migraphx/simplify_reshapes.hpp
+++ b/src/include/migraphx/simplify_reshapes.hpp
@@ -38,6 +38,7 @@ struct module;
 */
 struct MIGRAPHX_EXPORT simplify_reshapes
 {
+    size_t depth = 4;
    std::string name() const { return "simplify_reshapes"; }
    void apply(module& m) const;
 };

--- a/src/include/migraphx/stringutils.hpp
+++ b/src/include/migraphx/stringutils.hpp
@@ -86,7 +86,7 @@ inline std::string join_strings(Strings strings, const std::string& delim)
 inline std::vector<std::string> split_string(const std::string& s, char delim)
 {
    std::vector<std::string> elems;
-    std::stringstream ss(s + ' ');
+    std::stringstream ss(s + delim);
    std::string item;
    while(std::getline(ss, item, delim))
    {
@@ -149,6 +149,10 @@ interpolate_string(const std::string& input, F f, std::string start = "${", std:
        result.append(it, next_start);
        if(next_start == input.end())
            break;
+        if(next_end == input.end())
+        {
+            throw std::runtime_error("Unbalanced brackets");
+        }
        auto r = f(next_start + start.size(), next_end);
        result.append(r.begin(), r.end());
        it = next_end + end.size();

--- a/src/memory_coloring.cpp
+++ b/src/memory_coloring.cpp
@@ -23,9 +23,9 @@
 */
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/module.hpp>
-#include <migraphx/operators.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/iterator_for.hpp>
+#include <migraphx/make_op.hpp>
 #include <migraphx/functional.hpp>
 #include <migraphx/algorithm.hpp>
 #include <migraphx/ranges.hpp>
@@ -382,7 +382,8 @@ void memory_coloring::apply(module& m) const
        auto s             = ins->get_shape();
        std::size_t offset = seg.first * alignment;
        assert(offset < n);
-        m.replace_instruction(ins, op::load{s, offset}, mem);
+        m.replace_instruction(
+            ins, make_op("load", {{"shape", to_value(s)}, {"offset", offset}}), mem);
    }

    // Replace zero allocation
@@ -391,7 +392,8 @@ void memory_coloring::apply(module& m) const
        if(ins->name() != allocation_op)
            continue;
        assert(ins->get_shape().bytes() == 0);
-        m.replace_instruction(ins, op::load{ins->get_shape(), 0}, mem);
+        m.replace_instruction(
+            ins, make_op("load", {{"shape", to_value(ins->get_shape())}, {"offset", 0}}), mem);
    }

    // Remove scratch parameter if its not used

--- a/src/normalize_attributes.cpp
+++ b/src/normalize_attributes.cpp
@@ -26,7 +26,7 @@
 #include <migraphx/normalize_attributes.hpp>
 #include <migraphx/stringutils.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
-
+#include <migraphx/op/common.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

@@ -49,6 +49,10 @@ auto tune_attribute(const std::vector<int64_t>& vec,
                    Message m)
 {
    std::vector<int64_t> result(vec);
+    if(result.empty())
+    {
+        return result;
+    };
    int64_t n_rank                                 = input_shape.ndim();
    std::vector<op::normalize_attribute> vec_attrs = val.to_vector<op::normalize_attribute>();
    if(contains(vec_attrs, op::normalize_attribute::use_output))
@@ -188,20 +192,27 @@ bool normalize_attributes(operation& op, const shape& input_shape)
    auto val   = op.to_value();
    if(attrs.contains("normalize_padding"))
    {
-        auto padding       = val.at(attrs.at("normalize_padding").to<std::string>());
-        auto padding_size  = padding.size();
-        auto padding_start = 2;
-
-        if(padding_size == 2 * (input_shape.ndim() - padding_start))
-            tuned = true;
-        else if(padding_size != (input_shape.ndim() - padding_start))
-            MIGRAPHX_THROW("inconsistent padding size");
-        else
+        bool use_auto_padding =
+            (val.contains("padding_mode") and
+             (val.at("padding_mode").to<int>() != migraphx::op::padding_mode_t::default_));
+        if(not use_auto_padding)
        {
-            auto result    = tune_pad_attribute(padding);
-            val["padding"] = result;
-            op.from_value(val);
-            tuned = true;
+            auto padding       = val.at(attrs.at("normalize_padding").to<std::string>());
+            auto padding_size  = padding.size();
+            auto padding_start = 2;
+            if(padding_size == 2 * (input_shape.ndim() - padding_start))
+                tuned = true;
+            else if(padding_size != (input_shape.ndim() - padding_start))
+            {
+                MIGRAPHX_THROW("normalize_attributes: inconsistent padding vector size ");
+            }
+            else
+            {
+                auto result    = tune_pad_attribute(padding);
+                val["padding"] = result;
+                op.from_value(val);
+                tuned = true;
+            }
        }
    }
    if(not attrs.contains("normalize_axes"))
@@ -251,5 +262,22 @@ bool normalize_attributes(operation& op, const shape& input_shape)
    return tuned;
 }

+std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
+                                    const shape& input_shape,
+                                    const value& attr_val,
+                                    const std::string& prefix)
+{
+    return tune_attribute(axes, {}, attr_val, input_shape, [&] { return prefix; });
+}
+
+std::vector<int64_t> normalize_indices(const std::vector<int64_t>& indices,
+                                       const std::vector<int64_t>& axes,
+                                       const shape& input_shape,
+                                       const value& attr_val,
+                                       const std::string& prefix)
+{
+    return tune_attribute(indices, axes, attr_val, input_shape, [&] { return prefix; });
+}
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/onnx/parse_pooling.cpp
+++ b/src/onnx/parse_pooling.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -151,26 +151,6 @@ struct parse_pooling : op_parser<parse_pooling>
                kdims, paddings.size() / 2, "PARSE_POOLING: inconsistent explicit paddings");
        }

-        if(contains(info.attributes, "auto_pad"))
-        {
-            if(in_shape.dynamic())
-            {
-                MIGRAPHX_THROW(
-                    "PARSE_POOLING: Auto padding pooling with dynamic input shape not supported");
-            }
-            else
-            {
-                values["padding"].clear();
-                // return paddings could be empty, then setting to 0 for no padding
-                cal_auto_padding_size(info,
-                                      values,
-                                      values["lengths"].to_vector<std::size_t>(),
-                                      {1, 1},
-                                      in_shape.lens(),
-                                      paddings);
-            }
-        }
-
        if(paddings.size() != 2 * kdims)
        {
            paddings.resize(kdims * 2);
@@ -192,6 +172,36 @@ struct parse_pooling : op_parser<parse_pooling>
        // used to calculate the supposed output shape
        std::vector<int64_t> orig_padding = paddings;

+        // TODO:  add parsing for dilations
+        if(contains(info.attributes, "auto_pad") and
+           to_upper(info.attributes["auto_pad"].s()) != "NOTSET")
+        {
+            auto auto_pad = to_upper(info.attributes["auto_pad"].s());
+            // don't use the given padding sizes, if any
+            // values["padding"].clear();
+            if(in_shape.dynamic())
+            {
+                // set padding_mode to trigger auto padding at runtime
+                bool is_same_upper     = (auto_pad.find("SAME_UPPER") != std::string::npos);
+                values["padding_mode"] = is_same_upper ? to_value(op::padding_mode_t::same_upper)
+                                                       : to_value(op::padding_mode_t::same_lower);
+            }
+            else
+            {
+                // Calculate auto padding
+                // dilations (argument 4) not supported; default to all 1's
+                cal_auto_padding_size(info,
+                                      values,
+                                      values["lengths"].to_vector<std::size_t>(),
+                                      std::vector<size_t>(in_shape.ndim() - 2, 1),
+                                      in_shape.lens(),
+                                      paddings);
+                values["padding"] = paddings;
+                // default padding_mode indicates that padding sizes are not calculated dynamically
+                values["padding_mode"] = migraphx::op::padding_mode_t::default_;
+            }
+        }
+
        std::vector<int64_t> slice_start;
        std::vector<int64_t> slice_end;
        tune_padding_size(values, paddings, count_include_pad, slice_start);
@@ -208,8 +218,9 @@ struct parse_pooling : op_parser<parse_pooling>
            orig_padding.insert(orig_padding.begin(), 2, 0);
            op::pad pad{orig_padding, 0.0f};
            shape padded_shape = pad.compute_shape({l0->get_shape()});
-            auto out_lens      = make_op("pooling", values).compute_shape({padded_shape}).lens();

+            // make an op just to get its output shape
+            auto out_lens = make_op("pooling", values).compute_shape({padded_shape}).lens();
            // compute slice_end information
            slice_end.resize(slice_start.size());
            std::transform(out_lens.begin() + 2,

--- a/src/onnx/parse_resize.cpp
+++ b/src/onnx/parse_resize.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -97,22 +97,19 @@ const auto& get_original_idx_op(const std::string& mode)
 static std::vector<int>
 calc_neighbor_points(const std::vector<std::vector<std::vector<std::size_t>>>& vvv_ind,
                     int i_dim,
-                     const std::vector<std::vector<std::size_t>>& vec_dims,
+                     std::vector<std::vector<std::size_t>> vec_dims,
                     const shape& in_s)
 {
    if(i_dim == vvv_ind.size())
    {
-        std::vector<int> vec_ind;
-        vec_ind.resize(vec_dims.size());
+        std::vector<int> vec_ind(vec_dims.size());
        std::transform(vec_dims.begin(), vec_dims.end(), vec_ind.begin(), [&](auto idx) {
            return static_cast<int>(in_s.index(idx));
        });
-
        return vec_ind;
    }

-    const auto& vv_ind = vvv_ind[i_dim];
-    const auto& vv_lo  = vv_ind.at(0);
+    const auto& vv_lo = vvv_ind[i_dim][0];
    std::vector<std::vector<std::size_t>> vec_dims1;
    for(std::size_t start = 0; start < vec_dims.size(); start += vv_lo.size())
    {
@@ -126,8 +123,8 @@ calc_neighbor_points(const std::vector<std::vector<std::vector<std::size_t>>>& v
                       });
    }

-    const auto& vv_hi = vv_ind.at(1);
-    for(std::size_t start = 0; start < vec_dims.size(); start += vv_lo.size())
+    const auto& vv_hi = vvv_ind[i_dim][1];
+    for(std::size_t start = 0; start < vec_dims.size(); start += vv_hi.size())
    {
        std::transform(vv_hi.begin(),
                       vv_hi.end(),
@@ -138,8 +135,8 @@ calc_neighbor_points(const std::vector<std::vector<std::vector<std::size_t>>>& v
                           return dim;
                       });
    }
-
-    return calc_neighbor_points(vvv_ind, i_dim + 1, vec_dims1, in_s);
+    vec_dims.clear();
+    return calc_neighbor_points(vvv_ind, i_dim + 1, std::move(vec_dims1), in_s);
 }

 static std::string get_coord_trans_mode(const onnx_parser::attribute_map& attr)
@@ -240,7 +237,7 @@ struct parse_resize : op_parser<parse_resize>
                auto arg_out_s = arg->eval();
                check_arg_empty(arg_out_s,
                                "PARSE_" + opd.op_name + ": dynamic output size is not supported!");
-                arg_out_s.visit([&](auto ol) { out_lens.assign(ol.begin(), ol.end()); });
+                arg_out_s.visit([&](const auto& ol) { out_lens.assign(ol.begin(), ol.end()); });

                if(out_lens.size() != in_lens.size())
                {
@@ -267,7 +264,7 @@ struct parse_resize : op_parser<parse_resize>
                                    "PARSE_" + opd.op_name +
                                        ": dynamic input scale is not supported!");

-                    arg_scale.visit([&](auto v) { vec_scale.assign(v.begin(), v.end()); });
+                    arg_scale.visit([&](const auto& v) { vec_scale.assign(v.begin(), v.end()); });
                    if(in_lens.size() != vec_scale.size())
                    {
                        MIGRAPHX_THROW("PARSE_" + opd.op_name +
@@ -300,15 +297,15 @@ struct parse_resize : op_parser<parse_resize>

            // map out_idx to in_idx
            auto nearest_op = get_nearest_op(nearest_mode);
-            shape_for_each(out_s, [&](auto idx) {
-                auto in_idx = idx;
+            shape_for_each(out_s, [&](const auto& out_idx_v, size_t out_idx) {
+                std::vector<size_t> in_idx(out_idx_v.size());
                for(auto ii = 0; ii < in_lens.size(); ++ii)
                {
-                    auto idx_val = idx_op(in_lens[ii], out_lens[ii], idx[ii], vec_scale[ii]);
+                    auto idx_val = idx_op(in_lens[ii], out_lens[ii], out_idx_v[ii], vec_scale[ii]);
                    in_idx[ii]   = nearest_op(in_lens[ii], idx_val);
                }

-                ind[out_s.index(idx)] = static_cast<int64_t>(in_s.index(in_idx));
+                ind[out_idx] = static_cast<int64_t>(in_s.index(in_idx));
            });

            shape ind_s{shape::int32_type, out_lens};
@@ -323,24 +320,21 @@ struct parse_resize : op_parser<parse_resize>

            // get the number of dimensions
            std::size_t n_dim = out_lens.size();
-            std::vector<std::vector<std::size_t>> vv_ind(2, std::vector<std::size_t>(out_elements));
-            std::vector<std::vector<std::vector<std::size_t>>> vvv_ind(n_dim, vv_ind);
+            auto vvv_ind = std::vector(n_dim, std::vector(2, std::vector<size_t>(out_elements)));
            std::vector<std::vector<float>> delta(n_dim, std::vector<float>(out_elements));

-            shape_for_each(out_s, [&](auto idx) {
-                auto in_idx  = idx;
-                auto out_idx = out_s.index(idx);
+            shape_for_each(out_s, [&](const auto& out_idx_v, size_t out_idx) {
                for(auto ii = 0; ii < in_lens.size(); ++ii)
                {
-                    auto idx_val = idx_op(in_lens[ii], out_lens[ii], idx[ii], vec_scale[ii]);
+                    auto idx_val = idx_op(in_lens[ii], out_lens[ii], out_idx_v[ii], vec_scale[ii]);
                    vvv_ind[ii][0][out_idx] = nearest_floor(in_lens[ii], idx_val);
                    vvv_ind[ii][1][out_idx] = nearest_ceil(in_lens[ii], idx_val);
                    delta[ii][out_idx]      = idx_val - vvv_ind[ii][0][out_idx];
                }
            });

-            std::vector<std::vector<std::size_t>> vec_dims(out_elements);
-            auto ind      = calc_neighbor_points(vvv_ind, 0, vec_dims, in_s);
+            auto ind = calc_neighbor_points(
+                vvv_ind, 0, std::vector<std::vector<std::size_t>>(out_elements), in_s);
            auto ind_lens = out_lens;
            ind_lens[0] *= (std::size_t{1} << n_dim);
            shape ind_s{shape::int32_type, ind_lens};

--- a/src/onnx/parse_slice.cpp
+++ b/src/onnx/parse_slice.cpp
@@ -34,16 +34,65 @@ namespace onnx {

 struct parse_slice : op_parser<parse_slice>
 {
+
    std::vector<op_desc> operators() const { return {{"Slice"}}; }

+    struct slice_desc
+    {
+        op::slice op;
+        std::vector<instruction_ref> op_args;
+        std::vector<int64_t> steps;
+        std::vector<int64_t> raxes;
+
+        void always_insert(instruction_ref arg) { op_args.insert(op_args.begin(), arg); }
+
+        std::vector<int64_t> insert(instruction_ref arg)
+        {
+            std::vector<int64_t> result;
+            migraphx::argument arg_value = arg->eval();
+            if(arg_value.empty())
+            {
+                op_args.insert(op_args.begin(), arg);
+            }
+            else
+            {
+                arg_value.visit([&](auto s) { result.assign(s.begin(), s.end()); });
+            }
+            return result;
+        }
+    };
+
    instruction_ref parse(const op_desc& /*opd*/,
                          const onnx_parser& parser,
-                          onnx_parser::node_info info,
-                          std::vector<instruction_ref> args) const
+                          const onnx_parser::node_info& info,
+                          const std::vector<instruction_ref>& args) const
    {
-        op::slice op;
+        auto sd  = construct_slice_desc(parser, info, args);
+        auto ins = info.add_instruction(sd.op, sd.op_args);
+        if(not sd.raxes.empty())
+        {
+            ins = info.add_instruction(make_op("reverse", {{"axes", sd.raxes}}), ins);
+        }
+        // If any steps are other than default 1, add a "steps" op
+        if(std::any_of(sd.steps.begin(), sd.steps.end(), [](auto s) { return std::abs(s) != 1; }))
+        {
+            std::vector<int64_t> nsteps;
+            std::transform(sd.steps.begin(),
+                           sd.steps.end(),
+                           std::back_inserter(nsteps),
+                           [](auto s) { return std::abs(s); });
+            return ins = info.add_instruction(
+                       make_op("step", {{"axes", sd.op.axes}, {"steps", nsteps}}), ins);
+        }
+        else
+            return ins;
+    }

-        std::vector<int64_t> steps;
+    slice_desc construct_slice_desc(const onnx_parser& parser,
+                                    onnx_parser::node_info info,
+                                    std::vector<instruction_ref> args) const
+    {
+        slice_desc sd;

        // slice can have up to 5 inputs, we first check the 5th one
        // to decide whether MIGRAPHX can handle this slice.
@@ -51,89 +100,73 @@ struct parse_slice : op_parser<parse_slice>
        {
            migraphx::argument step_arg = args.back()->eval();
            check_arg_empty(step_arg, "PARSE_SLICE: cannot handle variable steps for slice");
-            step_arg.visit([&](auto s) { steps.assign(s.begin(), s.end()); });
+            step_arg.visit([&](auto s) { sd.steps.assign(s.begin(), s.end()); });
        }

        if(args.size() >= 4)
        {
-            migraphx::argument axes_arg = args.at(3)->eval();
-            check_arg_empty(axes_arg, "PARSE_SLICE: cannot handle variable axes for slice");
-            axes_arg.visit([&](auto s) { op.axes.assign(s.begin(), s.end()); });
+            sd.op.axes = sd.insert(args.at(3));
        }
        else if(contains(info.attributes, "axes"))
        {
            literal s = parser.parse_value(info.attributes.at("axes"));
-            s.visit([&](auto v) { copy(v, std::back_inserter(op.axes)); });
+            s.visit([&](auto v) { copy(v, std::back_inserter(sd.op.axes)); });
        }

        if(args.size() >= 3)
        {
-            migraphx::argument end_arg = args.at(2)->eval();
-            check_arg_empty(end_arg, "PARSE_SLICE: cannot handle variable ends for slice");
-            end_arg.visit([&](auto s) { op.ends.assign(s.begin(), s.end()); });
+            sd.op.ends = sd.insert(args.at(2));
        }
        else if(contains(info.attributes, "ends"))
        {
            literal s = parser.parse_value(info.attributes.at("ends"));
-            s.visit([&](auto v) { copy(v, std::back_inserter(op.ends)); });
+            s.visit([&](auto v) { copy(v, std::back_inserter(sd.op.ends)); });
        }

        if(args.size() >= 2)
        {
-            migraphx::argument start_arg = args.at(1)->eval();
-            check_arg_empty(start_arg, "PARSE_SLICE: cannot handle variable starts for slice");
-            start_arg.visit([&](auto s) { op.starts.assign(s.begin(), s.end()); });
+            sd.op.starts = sd.insert(args.at(1));
        }
        else if(contains(info.attributes, "starts"))
        {
            literal s = parser.parse_value(info.attributes.at("starts"));
-            s.visit([&](auto v) { copy(v, std::back_inserter(op.starts)); });
+            s.visit([&](auto v) { copy(v, std::back_inserter(sd.op.starts)); });
        }

+        // data input argument
+        sd.always_insert(args.at(0));
+
        // If axes arg is not given, the default is all of them.
-        if(op.axes.empty())
+        if(sd.op.axes.empty() and sd.op_args.size() < 3)
        {
            std::vector<int64_t> axes(args[0]->get_shape().ndim());
            std::iota(axes.begin(), axes.end(), int64_t{0});
-            op.axes = axes;
+            sd.op.axes = axes;
        }

-        std::vector<int64_t> raxes;
+        if(not sd.steps.empty())
+        {
+            if(sd.op.starts.empty() or sd.op.ends.empty())
+                MIGRAPHX_THROW("PARSE_SLICE: steps and variable starts and ends is not supported");
+            if(sd.op.axes.empty())
+                MIGRAPHX_THROW("PARSE_SLICE: steps and variable axes is not supported");
+        }

-        assert(steps.empty() or steps.size() == op.axes.size());
-        assert(op.axes.size() == op.starts.size());
-        assert(op.axes.size() == op.ends.size());
+        assert(sd.steps.empty() or sd.steps.size() == sd.op.axes.size());

        // If any axes have negative step, prepare to add a "reverse" op
-        for(auto i : range(steps.size()))
+        for(auto i : range(sd.steps.size()))
        {
-            if(steps[i] >= 0)
+            if(sd.steps[i] >= 0)
                continue;
-            op.starts[i] += 1;
-            if(op.starts[i] == 0)
-                op.starts[i] = INT_MAX;
-            op.ends[i] += 1;
-            raxes.push_back(op.axes[i]);
-            std::swap(op.starts[i], op.ends[i]);
-        }
-
-        auto ins = info.add_instruction(op, args[0]);
-        if(not raxes.empty())
-        {
-            ins = info.add_instruction(make_op("reverse", {{"axes", raxes}}), ins);
+            sd.op.starts[i] += 1;
+            if(sd.op.starts[i] == 0)
+                sd.op.starts[i] = INT_MAX;
+            sd.op.ends[i] += 1;
+            sd.raxes.push_back(sd.op.axes[i]);
+            std::swap(sd.op.starts[i], sd.op.ends[i]);
        }
-        // If any steps are other than default 1, add a "steps" op
-        if(std::any_of(steps.begin(), steps.end(), [](auto s) { return std::abs(s) != 1; }))
-        {
-            std::vector<int64_t> nsteps;
-            std::transform(steps.begin(), steps.end(), std::back_inserter(nsteps), [](auto s) {
-                return std::abs(s);
-            });
-            return ins = info.add_instruction(
-                       make_op("step", {{"axes", op.axes}, {"steps", nsteps}}), ins);
-        }
-        else
-            return ins;
+        return sd;
    }
 };