manual_merge

ac04f3cc · Khalique Ahmed · d39c3343 · d8011adf · ac04f3cc · ac04f3cc
Commit ac04f3cc authored Nov 10, 2023 by Khalique Ahmed
20 changed files
--- a/src/include/migraphx/op/random_seed.hpp
+++ b/src/include/migraphx/op/random_seed.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MIGRAPHX_GUARD_OPERATORS_RANDOM_SEED_HPP
+#define MIGRAPHX_GUARD_OPERATORS_RANDOM_SEED_HPP
+
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <random>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+/**
+ *    Generates a random seed for the use of random number generators.  Generating the seed
+ * at runtime guarantees there will be a different random sequence on every execution.
+ * This operation has no inputs or attributes, and outputs an unsigned integer tensor with
+ * a single value.
+ */
+struct random_seed
+{
+    shape::type_t dtype = shape::type_t::uint64_type;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.dtype, "dtype"));
+    }
+
+    std::string name() const { return "random_seed"; }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(0);
+        return shape{dtype};
+    }
+
+    argument compute(const shape& output_shape, const std::vector<argument>&) const
+    {
+        argument result(output_shape);
+
+        result.visit([&](auto output) { output.front() = std::random_device{}(); });
+        return result;
+    }
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/op/random_uniform.hpp
+++ b/src/include/migraphx/op/random_uniform.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * Random Uniform distribution operator.  Given a shape, populate it with random
+ * values.  Calls to random_uniform using the same randomization seed as a
+ * literal input will
+ * always generate the same pseudo-random sequence.
+ *
+ *      Inputs:   (1) randomization seed (any type is allowed)
+ *                (2) output buffer argument to be populated.
+ *
+ *      Attributes:  none
+ *
+ *      Output:   Returns the buffer from input #2.
+ *
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_RANDOM_UNIFORM_HPP
+#define MIGRAPHX_GUARD_OPERATORS_RANDOM_UNIFORM_HPP
+
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <random>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+/**
+ * random_uniform populates the passed shape with random numbers, in a uniform
+ * distribution.  Range for floating-point data types is (0, 1);
+ * for integer types it is [0, <max value for the type>]
+ */
+struct random_uniform
+{
+    // The random_uniform operation needs the random number generator seed
+    // to be passed as a runtime input.
+
+    std::string name() const { return "random_uniform"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(2);
+
+        return inputs.at(1);
+    }
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
+    {
+        // Output goes into the passed buffer, not the shape output.
+        argument result{dyn_out.computed_shape};
+        uint64_t local_seed = args[0].at<uint64_t>(0);
+        std::mt19937 gen(local_seed);
+
+        result.visit([&](auto output) {
+            using type = typename decltype(output)::value_type;
+            if constexpr(std::is_integral<type>{})
+            {
+#ifdef _MSC_VER
+                // According to the C++ specification, the effect is undefined if the result type
+                // for the generator is not one of short, int, long, long long, unsigned short,
+                // unsigned int, unsigned long, or unsigned long long. See
+                // https://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution.
+                if constexpr(sizeof(type) == 1)
+                {
+                    std::uniform_int_distribution<int> dis{std::numeric_limits<type>::min(),
+                                                           std::numeric_limits<type>::max()};
+                    std::generate(output.begin(), output.end(), [&] { return dis(gen); });
+                }
+                else
+#endif
+                {
+                    // default range for all integer types is
+                    // (0, std::uniform_int_distribution<type>::max()).
+                    // Todo:  enable different ranges
+                    std::uniform_int_distribution<type> dis;
+                    std::generate(output.begin(), output.end(), [&] { return dis(gen); });
+                }
+            }
+            else
+            {
+                // default real distribution type is double with range (0, 1);
+                std::uniform_real_distribution<> dis;
+                std::generate(output.begin(), output.end(), [&] { return dis(gen); });
+            }
+        });
+        return result;
+    }
+
+    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; }
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/op/reduce_op.hpp
+++ b/src/include/migraphx/op/reduce_op.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -163,7 +163,7 @@ struct reduce_op : op_name<Derived>
        auto& self        = static_cast<const Derived&>(*this);
        auto data_idx     = out_idx;
        accumulator val   = self.init();
-        shape_for_each(batch_shape, [&](auto b_idx) {
+        shape_for_each(batch_shape, [&](const auto& b_idx) {
            this->tune_dims(tuned_axes, b_idx, data_idx);
            accumulator x = input(data_idx.begin(), data_idx.end());
            val           = self.op()(accumulator{self.input()(x)}, val);

--- a/src/include/migraphx/op/reshape.hpp
+++ b/src/include/migraphx/op/reshape.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -29,12 +29,29 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/dyn_output.hpp>
-#include <migraphx/optional.hpp>
+
+#include <algorithm>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

+/**
+ * 1 input version:
+ * reshape(input_data)
+ * this.dims = output_dims
+ * Makes a copy of input_data to the output shape.
+ *
+ * 2 input version:
+ * reshape(input_data, output_buffer)
+ * this.dims = unset
+ * Copies input_data to output_buffer; output_buffer already has the output shape.
+ * This version will not fail gracefully if the input shape and output_buffer shape are
+ * incompatible. There's a throw that will catch when the number of elements do not match at
+ * runtime. This version should only be used for dynamic reshapes (output dimensions only known at
+ * runtime). If output_buffer has a static shape during compile/parse, you can use the 1 input
+ * version.
+ */
 struct reshape
 {
    std::vector<int64_t> dims;
@@ -45,8 +62,6 @@ struct reshape
        return pack(f(self.dims, "dims"));
    }

-    value attributes() const { return {{"require_std_shape", true}}; }
-
    std::string name() const { return "reshape"; }

    shape dyn_compute_shape(shape s0) const
@@ -110,27 +125,9 @@ struct reshape
        return it;
    }

-    template <class DimIterator, class StrideIterator>
-    static auto can_strides_merge(DimIterator dim_start,
-                                  DimIterator dim_last,
-                                  StrideIterator stride_start,
-                                  StrideIterator stride_last)
-    {
-        assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
-        auto cstride = *std::prev(stride_last);
-        return std::equal(std::make_reverse_iterator(dim_last),
-                          std::make_reverse_iterator(dim_start + 1),
-                          std::make_reverse_iterator(stride_last - 1),
-                          std::make_reverse_iterator(stride_start),
-                          [&](auto dim, auto stride) {
-                              cstride *= dim;
-                              return stride == cstride;
-                          });
-    }
-
-    // This will reshape the dimesions of the input shape to use the lens of
-    // `rdims`. If this can't be done without changing memory layout then it
-    // will return nullopt
+    // This will attempt to alias the dimensions of the input shape to the lens of
+    // `rdims`. Unlike reshape_lazy though we can modify memory layout with copies and this
+    // can remove previous nullopts that were sent back for the alias case
    static optional<shape> reshape_dims(const shape& input, const std::vector<std::size_t>& rdims)
    {
        if(input.standard())
@@ -155,13 +152,8 @@ struct reshape
            {
                auto start = idims.begin() + i;
                auto it    = compute_end_dim(start, idims.end(), rdim);
-                if(it == start)
-                    return nullopt;
                auto n = it - start;
                assert((i + n) <= istrides.size());
-                if(not can_strides_merge(
-                       start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1))
-                    return nullopt;
                i += n;
                rstrides.push_back(istrides[i]);
            }
@@ -170,8 +162,7 @@ struct reshape
            {
                auto start = rdims.begin() + i;
                auto it    = compute_end_dim(start, rdims.end(), idim);
-                if(it == start)
-                    return nullopt;
+
                auto n = it - start;
                assert((r + n) <= rdims.size());
                auto stride = istrides[i] * idim;
@@ -191,15 +182,11 @@ struct reshape
            auto stride = rstrides.back();
            for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
            {
-                if(d != 1)
-                    return nullopt;
+                (void)d;
                rstrides.push_back(stride);
            }
        }

-        if(rdims.size() != rstrides.size())
-            return nullopt;
-
        return shape{input.type(), rdims, rstrides};
    }

@@ -233,41 +220,68 @@ struct reshape
        }

        auto s = reshape_dims(inputs.front(), rdims);
-        if(not s.has_value())
-            MIGRAPHX_THROW("Reshape on axis that is not packed.");

        if(s->elements() != inputs.front().elements())
-            MIGRAPHX_THROW("Reshape: Wrong number of elements for reshape: reshape has " +
+            MIGRAPHX_THROW("reshape: Wrong number of elements for reshape: reshape has " +
                           std::to_string(s->elements()) + " elements whereas the input has " +
                           std::to_string(inputs.front().elements()));

-        assert(s->bytes() == inputs.front().bytes());
        return *s;
    }

    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        check_shapes{inputs, *this, true}.has(1, 2);
+
        auto n_neg_dims = std::count(dims.begin(), dims.end(), -1);
        if(n_neg_dims > 1)
-            MIGRAPHX_THROW("Reshape: Dimensions for reshape can only have one -1 dim");
-        auto s0 = inputs[0];
-        if(s0.dynamic())
+            MIGRAPHX_THROW("reshape: Dimensions for reshape can only have one -1 dim");
+
+        auto s0 = inputs.front();
+        if(inputs.size() == 1)
        {
-            return dyn_compute_shape(s0);
+            if(s0.dynamic())
+            {
+                return dyn_compute_shape(s0);
+            }
+            else
+            {
+                return static_compute_shape(inputs, n_neg_dims);
+            }
        }
        else
        {
-            return static_compute_shape(inputs, n_neg_dims);
+            return inputs.back();
        }
    }

    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(dyn_out.computed_shape);
-    }
+        assert(dyn_out.computed_shape.standard());
+        if(args.size() == 1)
+        {
+            argument result{dyn_out.computed_shape};

-    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
+            visit_all(result, args[0])([&](auto output, auto input) {
+                std::copy(input.begin(), input.end(), output.begin());
+            });
+            return result;
+        }
+        else
+        {
+            // 2 arg
+            if(args[0].get_shape().elements() != args[1].get_shape().elements())
+            {
+                MIGRAPHX_THROW("Reshape: Number of elements must match at runtime. Input: " +
+                               std::to_string(args[0].get_shape().elements()) +
+                               " Output buffer: " + std::to_string(args[1].get_shape().elements()));
+            }
+            visit_all(args[1], args[0])([&](auto output, auto input) {
+                std::copy(input.begin(), input.end(), output.begin());
+            });
+            return args[1];
+        }
+    }
 };

 } // namespace op

--- a/src/include/migraphx/op/reshape_lazy.hpp
+++ b/src/include/migraphx/op/reshape_lazy.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_RESHAPE_LAZY_HPP
+#define MIGRAPHX_GUARD_OPERATORS_RESHAPE_LAZY_HPP
+
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
+#include <migraphx/optional.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+struct reshape_lazy
+{
+    std::vector<int64_t> dims;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.dims, "dims"));
+    }
+
+    value attributes() const { return {{"require_std_shape", true}}; }
+
+    std::string name() const { return "reshape_lazy"; }
+
+    shape dyn_compute_shape(shape s0) const
+    {
+        auto dyn_dims      = s0.dyn_dims();
+        auto num_not_fixed = std::count_if(
+            dyn_dims.cbegin(), dyn_dims.cend(), [](auto dd) { return not dd.is_fixed(); });
+        if(num_not_fixed != 1)
+        {
+            MIGRAPHX_THROW("reshape_lazy: Only supports one non-fixed dynamic_dimension");
+        }
+        // track number of fixed elements in input and output
+        std::size_t num_dims_ele = 1;
+        std::size_t num_dd_ele   = 1;
+        for(std::size_t i = 0; i < dyn_dims.size(); ++i)
+        {
+            if(dyn_dims[i].is_fixed())
+            {
+                num_dims_ele *= dims[i];
+                num_dd_ele *= dyn_dims[i].min;
+            }
+            else
+            {
+                if(dims[i] != 0 and dims[i] != -1)
+                {
+                    MIGRAPHX_THROW(
+                        "reshape_lazy: Non-fixed dynamic_dimension doesn't match with 0 or -1 "
+                        "output dimension");
+                }
+            }
+        }
+        if(num_dims_ele != num_dd_ele)
+        {
+            MIGRAPHX_THROW("reshape_lazy: Number of fixed elements must match. Input: " +
+                           std::to_string(num_dd_ele) + " Output: " + std::to_string(num_dims_ele));
+        }
+        // construct output dynamic shape from dims attribute
+        std::vector<shape::dynamic_dimension> output_dyn_dims(dims.size());
+        std::transform(dims.cbegin(),
+                       dims.cend(),
+                       dyn_dims.cbegin(),
+                       output_dyn_dims.begin(),
+                       [](std::size_t dim, auto dyn_dim) {
+                           if(not dyn_dim.is_fixed())
+                               return dyn_dim;
+                           return shape::dynamic_dimension{dim, dim};
+                       });
+        return {s0.type(), output_dyn_dims};
+    }
+
+    template <class Iterator>
+    static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim)
+    {
+        std::size_t x = 1;
+        auto it       = std::find_if(start, last, [&](auto i) {
+            x *= i;
+            return x >= dim;
+        });
+        if(x != dim)
+            return start;
+        return it;
+    }
+
+    template <class DimIterator, class StrideIterator>
+    static auto can_strides_merge(DimIterator dim_start,
+                                  DimIterator dim_last,
+                                  StrideIterator stride_start,
+                                  StrideIterator stride_last)
+    {
+        assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
+        auto cstride = *std::prev(stride_last);
+        return std::equal(std::make_reverse_iterator(dim_last),
+                          std::make_reverse_iterator(dim_start + 1),
+                          std::make_reverse_iterator(stride_last - 1),
+                          std::make_reverse_iterator(stride_start),
+                          [&](auto dim, auto stride) {
+                              cstride *= dim;
+                              return stride == cstride;
+                          });
+    }
+
+    // This will attempt to alias the dimensions of the input shape to the lens of
+    // `rdims`. If this can't be done without changing memory layout then it
+    // will return nullopt
+    static optional<shape> reshape_lazy_dims(const shape& input,
+                                             const std::vector<std::size_t>& rdims)
+    {
+        if(input.standard())
+            return shape{input.type(), rdims};
+
+        const auto& idims    = input.lens();
+        const auto& istrides = input.strides();
+
+        std::vector<std::size_t> rstrides;
+        std::size_t i = 0;
+        std::size_t r = 0;
+        while(i < idims.size() and r < rdims.size())
+        {
+            auto idim = idims[i];
+            auto rdim = rdims[r];
+            if(rdim == idim)
+            {
+                rstrides.push_back(istrides[i]);
+            }
+            // squeeze
+            else if(rdim > idim)
+            {
+                auto start = idims.begin() + i;
+                auto it    = compute_end_dim(start, idims.end(), rdim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                assert((i + n) <= istrides.size());
+                if(not can_strides_merge(
+                       start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1))
+                    return nullopt;
+                i += n;
+                rstrides.push_back(istrides[i]);
+            }
+            // unsqueeze
+            else // if(rdim < idim)
+            {
+                auto start = rdims.begin() + i;
+                auto it    = compute_end_dim(start, rdims.end(), idim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                assert((r + n) <= rdims.size());
+                auto stride = istrides[i] * idim;
+                std::for_each(start, it + 1, [&](auto dim) {
+                    stride /= dim;
+                    rstrides.push_back(stride);
+                });
+                r += n;
+            }
+            i++;
+            r++;
+        }
+
+        // Handle trailing 1s
+        if(rstrides.size() < rdims.size() and not rstrides.empty())
+        {
+            auto stride = rstrides.back();
+            for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
+            {
+                if(d != 1)
+                    return nullopt;
+                rstrides.push_back(stride);
+            }
+        }
+
+        if(rdims.size() != rstrides.size())
+            return nullopt;
+
+        return shape{input.type(), rdims, rstrides};
+    }
+
+    shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        auto&& idims = inputs.front().lens();
+        std::vector<std::size_t> rdims(dims.begin(), dims.end());
+
+        for(std::size_t i = 0; i < dims.size(); i++)
+        {
+            if(dims[i] == 0)
+                rdims[i] = idims[i];
+
+            // since rdims using size_t type, -1 is the max value
+            // is size_t that cause later compuation incorrect
+            if(dims[i] == -1)
+                rdims[i] = 1;
+        }
+
+        if(n_neg_dims > 0)
+        {
+            size_t missing_dim =
+                inputs.front().elements() /
+                std::accumulate(rdims.begin(), rdims.end(), 1, std::multiplies<int64_t>());
+            for(std::size_t i = 0; i < rdims.size(); i++)
+            {
+                if(dims[i] == -1)
+                    rdims[i] = missing_dim;
+            }
+        }
+
+        auto s = reshape_lazy_dims(inputs.front(), rdims);
+        if(not s.has_value())
+            MIGRAPHX_THROW("reshape_lazy on axis that is not packed.");
+
+        if(s->elements() != inputs.front().elements())
+            MIGRAPHX_THROW(
+                "reshape_lazy: Wrong number of elements for reshape_lazy: reshape_lazy has " +
+                std::to_string(s->elements()) + " elements whereas the input has " +
+                std::to_string(inputs.front().elements()));
+
+        assert(s->bytes() == inputs.front().bytes());
+        return *s;
+    }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1);
+        auto n_neg_dims = std::count(dims.begin(), dims.end(), -1);
+        if(n_neg_dims > 1)
+            MIGRAPHX_THROW("reshape_lazy: Dimensions for reshape_lazy can only have one -1 dim");
+        auto s0 = inputs[0];
+        if(s0.dynamic())
+        {
+            return dyn_compute_shape(s0);
+        }
+        else
+        {
+            return static_compute_shape(inputs, n_neg_dims);
+        }
+    }
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
+    {
+        return args[0].reshape(dyn_out.computed_shape);
+    }
+
+    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/op/reverse.hpp
+++ b/src/include/migraphx/op/reverse.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -70,13 +70,13 @@ struct reverse
        argument result{s};
        auto lens = s.lens();
        visit_all(result, args.front())([&](auto output, auto input) {
-            shape_for_each(s, [&](const auto& out_idx) {
-                auto in_idx = out_idx;
+            shape_for_each(s, [&](const auto& out_idx_v, size_t out_idx) {
+                auto in_idx = out_idx_v;
                for(const auto& axis : axes)
                {
-                    in_idx[axis] = lens[axis] - 1 - out_idx[axis];
+                    in_idx[axis] = lens[axis] - 1 - out_idx_v[axis];
                }
-                output[s.index(out_idx)] = input[s.index(in_idx)];
+                output[out_idx] = input[s.index(in_idx)];
            });
        });


--- a/src/include/migraphx/op/roialign.hpp
+++ b/src/include/migraphx/op/roialign.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -33,6 +33,7 @@
 #include <migraphx/dfor.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/shape_for_each.hpp>
+#include <array>
 #include <cmath>
 #include <numeric>
 #include <utility>
@@ -113,10 +114,9 @@ struct roialign
    {
        std::vector<pos_weight> results(bin_grid_size[0] * bin_grid_size[1] * output_height *
                                        output_width);
-        shape_for_each(comp_s, [&](auto idx) {
-            std::array<std::size_t, 2> p = {idx[0], idx[1]};
-            std::array<std::size_t, 2> i = {idx[2], idx[3]};
-            auto index                   = comp_s.index(idx);
+        shape_for_each(comp_s, [&](const auto& idx_v, size_t index) {
+            std::array<std::size_t, 2> p = {idx_v[0], idx_v[1]};
+            std::array<std::size_t, 2> i = {idx_v[2], idx_v[3]};

            std::array<float, 2> xy{};
            std::array<int64_t, 2> low{};
@@ -125,7 +125,7 @@ struct roialign
            {
                xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] +
                         (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii];
-                xy[ii] = (coord_trans_mode == "output_half_pixel") ? (xy[ii] - 0.5f) : xy[ii];
+                xy[ii] = (coord_trans_mode == "half_pixel") ? (xy[ii] - 0.5f) : xy[ii];
                if(xy[ii] < -1.0 or xy[ii] > dims[ii])
                {
                    results[index] = pos_weight{};
@@ -255,7 +255,7 @@ struct roialign
                std::vector<std::size_t> comp_lens1 = {channels, out_dims[0], out_dims[1]};
                shape comp_s1{migraphx::shape::float_type, comp_lens1};
                std::vector<int64_t> vec_index(channels, 0);
-                shape_for_each(comp_s1, [&](auto idx) {
+                shape_for_each(comp_s1, [&](const auto& idx) {
                    auto c  = idx[0];
                    auto ph = idx[1];
                    auto pw = idx[2];

--- a/src/include/migraphx/op/scatter.hpp
+++ b/src/include/migraphx/op/scatter.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -66,7 +66,7 @@ struct scatter : op_name<Derived>

    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3).standard();
+        check_shapes{inputs, *this}.has(3);
        // If non-packed, this converts to a packed output while preserving permutation of tensor
        return inputs.front().with_lens(inputs.front().lens());
    }

--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
@@ -27,19 +27,58 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
-#include <migraphx/dyn_output.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/normalize_attributes.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

+/**
+ * Slice operator that accepts variable axes, starts and ends.
+ * All of `starts`, `ends`, and `axes` must be supplied by either
+ * their attribute or an input (but not both).
+ *
+ * Valid calls:
+ * slice(input); axes, starts, ends set
+ * slice(input, starts); axes, ends set
+ * slice(input, ends); starts, axes set
+ * slice(input, axes); starts, ends set
+ * slice(input, starts, ends); axes set
+ * slice(input, starts, axes); ends set
+ * slice(input, ends, axes); starts set
+ * slice(input, start, ends, axes); none set
+ *
+ * Attributes:
+ * axes: constant axes to slice over (optional)
+ * starts: constant slice starting indices (optional)
+ * ends: constant slice ending indices (optional)
+ *
+ * Parameters:
+ * data: the input tensor to slice (dynamic or static shape)
+ * input_starts: starting indices of slice (optional, static shape)
+ * input_ends: ending indices of slice (optional, static shape)
+ * input_axes: axes to slice over (optional, static shape)
+ */
 struct slice
 {
-    std::vector<int64_t> axes;
-    std::vector<int64_t> starts;
-    std::vector<int64_t> ends;
+    std::vector<int64_t> axes{};
+    std::vector<int64_t> starts{};
+    std::vector<int64_t> ends{};
+
+    /**
+     * Named arrays for the set attribute possibilities.
+     */
+    static constexpr std::array<bool, 3> all_set     = {true, true, true};
+    static constexpr std::array<bool, 3> ends_axes   = {false, true, true};
+    static constexpr std::array<bool, 3> starts_axes = {true, false, true};
+    static constexpr std::array<bool, 3> starts_ends = {true, true, false};
+    static constexpr std::array<bool, 3> axes_only   = {false, false, true};
+    static constexpr std::array<bool, 3> ends_only   = {false, true, false};
+    static constexpr std::array<bool, 3> starts_only = {true, false, false};
+    static constexpr std::array<bool, 3> none_set    = {false, false, false};

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -48,28 +87,212 @@ struct slice
    }

    /**
-     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are in
-     * limits.
+     * Ensure that attribute axes is within limits.
+     * Will attempt to normalize starts and ends; but will use the dynamic_dimension.max
+     * values for dynamic shapes. This makes it so you have to renormalize for
+     * non-fixed dynamic_dimensions.
     */
    value attributes() const
    {
-        value normalize     = value::object{};
-        normalize["axes"]   = value::array{normalize_attribute::include_min};
-        normalize["starts"] = value::array{normalize_attribute::clip_max,
-                                           normalize_attribute::clip_min,
-                                           normalize_attribute::include_max,
-                                           normalize_attribute::use_len,
-                                           normalize_attribute::include_min};
-        normalize["ends"]   = value::array{normalize_attribute::clip_max,
-                                         normalize_attribute::clip_min,
-                                         normalize_attribute::include_max,
-                                         normalize_attribute::use_len,
-                                         normalize_attribute::include_min};
-        return {{"normalize_axes", normalize}};
+        value normalize_axes     = value::object{};
+        normalize_axes["axes"]   = value::array{normalize_attribute::include_min};
+        normalize_axes["starts"] = value::array{normalize_attribute::clip_max,
+                                                normalize_attribute::clip_min,
+                                                normalize_attribute::include_max,
+                                                normalize_attribute::use_len,
+                                                normalize_attribute::include_min};
+        normalize_axes["ends"]   = value::array{normalize_attribute::clip_max,
+                                              normalize_attribute::clip_min,
+                                              normalize_attribute::include_max,
+                                              normalize_attribute::use_len,
+                                              normalize_attribute::include_min};
+        return {{"normalize_axes", normalize_axes}};
    }

    std::string name() const { return "slice"; }

+    /**
+     * Computes the slice output shape dimensions for given starts, ends,and axes.
+     * Templated to also handle tensor views.
+     * Possibly different type between [in_starts, in_ends] and [in_axes] if in_axes is this
+     * object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
+     */
+    template <class A, class B>
+    std::vector<std::size_t>
+    lens_calc(const std::vector<std::size_t>& lengths, A in_starts, A in_ends, B in_axes) const
+    {
+        auto new_lens = lengths;
+        for(std::size_t i = 0; i < in_axes.size(); ++i)
+        {
+            auto axis      = in_axes[i];
+            new_lens[axis] = in_ends[i] - in_starts[i];
+        }
+        return new_lens;
+    }
+
+    /// Get the attributes that are non-empty
+    std::array<bool, 3> get_set_attributes() const
+    {
+        std::array<std::vector<int64_t>, 3> attrs = {this->starts, this->ends, this->axes};
+        std::array<bool, 3> bool_vec;
+        std::transform(
+            attrs.cbegin(), attrs.cend(), bool_vec.begin(), [](auto a) { return not a.empty(); });
+        return bool_vec;
+    }
+
+    /// Helper function for normalize_compute_shape()
+    shape compute_two_or_more(std::vector<shape> inputs) const
+    {
+        auto input_shape    = inputs[0];
+        auto set_attributes = get_set_attributes();
+        // check that inputs [1, end) are all 1D, have the same
+        // dimension, and are static
+        check_shapes{inputs.begin() + 1,
+                     inputs.end(),
+                     std::string("SLICE: inputs (starts, ends, and input_axes)"),
+                     false}
+            .only_dims(1)
+            .same_dims();
+        auto dds = input_shape.to_dynamic().dyn_dims();
+        if(inputs.size() == 2)
+        {
+            if(set_attributes == ends_axes)
+            {
+                // attr ends and axes set; inputs are (data, input_starts)
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
+            }
+            else if(set_attributes == starts_axes)
+            {
+                // attr starts and axes set; inputs are (data, input_ends)
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
+            }
+            else if(set_attributes == starts_ends)
+            {
+                // attr starts and ends set; inputs are (data, input_axes)
+                if(inputs[1].lens().at(0) != starts.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 2 input and attributes mismatch");
+                }
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
+            }
+            else
+            {
+                MIGRAPHX_THROW("SLICE: Invalid 2 input and attributes configuration");
+            }
+        }
+        else if(inputs.size() == 3)
+        {
+            if(set_attributes == axes_only)
+            {
+                // attr axes set; inputs are (data, input_starts, input_ends)
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
+            }
+            else if(set_attributes == ends_only)
+            {
+                // attr ends set; inputs are (data, input_starts, input_axes)
+                if(inputs[1].lens().at(0) != ends.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
+                }
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
+            }
+            else if(set_attributes == starts_only)
+
+            {
+                // attr starts set; inputs are (data, input_ends, input_axes)
+                if(inputs[1].lens().at(0) != starts.size())
+                {
+                    MIGRAPHX_THROW("SLICE: 3 input and attributes mismatch");
+                }
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
+            }
+            else
+            {
+                MIGRAPHX_THROW("Invalid 3 input and attributes configuration");
+            }
+        }
+        else
+        {
+            // all 4 inputs (data, inputs_starts, input_ends, input_axes)
+            std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                return shape::dynamic_dimension{0, dd.max};
+            });
+        }
+        return shape{input_shape.type(), dds};
+    }
+
+    // uses the normalize_axes flag to normalize axes, starts, and ends
+    shape normalize_compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1, 2, 3, 4);
+        if(inputs.size() == 1)
+        {
+            auto input_shape    = inputs[0];
+            auto set_attributes = get_set_attributes();
+            if(set_attributes != all_set)
+            {
+                MIGRAPHX_THROW("SLICE 1_arg: Invalid 1 input and attributes configuration");
+            }
+            // NOTE: make sure to update how normalization works here if this type of slicing is
+            // changed to be allowed
+            if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
+                   return not input_shape.dyn_dims()[axis].is_fixed();
+               }))
+            {
+                MIGRAPHX_THROW(
+                    "SLICE 1_arg: slicing is not allowed on non-fixed dynamic input axis ");
+            }
+            if(input_shape.dynamic())
+            {
+                return shape{
+                    input_shape.type(),
+                    lens_calc(input_shape.min_lens(), this->starts, this->ends, this->axes),
+                    lens_calc(input_shape.max_lens(), this->starts, this->ends, this->axes),
+                    {}};
+            }
+            else
+            {
+                return shape{input_shape.type(),
+                             lens_calc(input_shape.lens(), this->starts, this->ends, this->axes),
+                             input_shape.strides()};
+            }
+        }
+        else
+        {
+            return compute_two_or_more(inputs);
+        }
+    }
+
+    /**
+     * Calculates the starting offset for the sliced tensor.
+     * Used in compute when only data input and all other information are in the attributes.
+     *
+     * \param s static input shape
+     */
    auto compute_offset(const shape& s) const
    {
        const std::vector<std::size_t>& lens    = s.lens();
@@ -90,80 +313,193 @@ struct slice
                offset += starts[axis] * strides[axis];
            }
        }
-        return offset;
+        return offset * s.type_size();
    }

-    shape normalize_compute_shape(std::vector<shape> inputs) const
+    /**
+     * Calculates the starting offset for the sliced tensor (for aliasing).
+     * Used for 2-4 inputs to `slice.
+     *
+     * \param s static input shape
+     * \param input_starts starting indices of slice
+     * \param ax_vec axes to slice on
+     */
+    template <class T>
+    auto compute_offset(const shape& s, const T& input_starts, const T& ax_vec) const
    {
-        check_shapes{inputs, *this, true}.has(1);
-        auto input_shape = inputs[0];
-        auto t           = input_shape.type();
-
-        // TODO:  When support for dynamic shapes is added to normalize_attributes,
-        //  remove this restriction.
-        if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
-               return not input_shape.dyn_dims()[axis].is_fixed();
-           }))
+        auto ret = 0;
+        for(std::size_t i = 0; i < ax_vec.size(); ++i)
        {
-            MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+            auto axis = ax_vec[i];
+            ret += input_starts[i] * s.strides().at(axis);
        }
+        return ret * s.type_size();
+    }

-        // For a static shape, old_lens will be adjusted to a new size
-        // for those axes that are sliced.
-        // For dynamic shape, the adjusted old_lens become the new max values,
-        // while updating the old mins and optimals if possible.
-        std::vector<std::size_t> new_mins;
-        std::vector<std::size_t> old_lens;
-        std::vector<std::size_t> old_strides;
-        // Doesn't handle optimals
-        if(input_shape.dynamic())
+    /**
+     * If given, normalize the inputs. Otherwise get from operator attributes.
+     * Return the values in a map.
+     *
+     * Parameters
+     * input_shape: static shape of the input
+     * input_starts: optional
+     * input_ends: optional
+     * input_ends: optional
+     */
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_starts_ends_axes(shape input_shape,
+                               const optional<std::vector<int64_t>>& input_starts,
+                               const optional<std::vector<int64_t>>& input_ends,
+                               const optional<std::vector<int64_t>>& input_axes) const
+    {
+        auto axes_attrs = this->attributes().at("normalize_axes");
+        std::vector<int64_t> norm_starts;
+        std::vector<int64_t> norm_ends;
+        std::vector<int64_t> norm_axes;
+        if(input_axes)
        {
-            old_lens = input_shape.max_lens();
-            new_mins = input_shape.min_lens();
+            norm_axes = normalize_axes(input_axes.value(),
+                                       input_shape,
+                                       axes_attrs.at("axes"),
+                                       "Slice variable input_axes");
        }
        else
        {
-            old_lens = input_shape.lens();
-            // For static shape (including during eval step after a dynamic input) the strides are
-            // indexed into the pre-slice array, so they are larger than the apparent size of the
-            // resulting shape.
-            old_strides = input_shape.strides();
+            norm_axes = this->axes;
        }
-
-        std::vector<std::size_t> new_lens = old_lens;
-        for(std::size_t i = 0; i < axes.size(); i++)
+        if(input_starts)
        {
-            auto axis            = axes[i];
-            size_t sliced_length = ends[i] - starts[i];
-            // A Numpy indexing convention: a slice size larger than the actual dimension
-            // is legal and the "ends" value is clipped to the axis size
-            new_lens[axis] = std::min(new_lens[axis], sliced_length);
-            if(input_shape.dynamic())
-            {
-                // TODO: when non-fixed shape slicing is allowed, this will be different than
-                // sliced_length, making use of TBD start/end values.
-                std::size_t sliced_min_length = ends[i] - starts[i];
-                // if the slice size is smaller than maxes but larger than mins
-                new_mins[axis] = std::min(sliced_min_length, new_mins[axis]);
-            }
+            norm_starts = normalize_indices(input_starts.value(),
+                                            norm_axes,
+                                            input_shape,
+                                            axes_attrs.at("starts"),
+                                            "Slice variable input_starts");
+        }
+        else
+        {
+            norm_starts = this->starts;
        }
-        if(input_shape.dynamic())
+        if(input_ends)
        {
-            return shape{t, new_mins, new_lens, {}};
+            norm_ends = normalize_indices(input_ends.value(),
+                                          norm_axes,
+                                          input_shape,
+                                          axes_attrs.at("ends"),
+                                          "Slice variable input ends");
        }
        else
        {
-            return shape{t, new_lens, old_strides};
+            norm_ends = this->ends;
        }
+        return {{"norm_starts", norm_starts}, {"norm_ends", norm_ends}, {"norm_axes", norm_axes}};
    }

    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        auto input = args[0];
-
-        auto offset = compute_offset(input.get_shape()) * dyn_out.computed_shape.type_size();
-        return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
+        auto input       = args[0];
+        auto input_shape = input.get_shape();
+        if(args.size() == 1)
+        {
+            std::size_t offset = compute_offset(input_shape);
+            return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
+        }
+        else
+        {
+            // Note that we re-normalize both the attributes and inputs because of the non-fixed
+            // dynamic input shape case. It's possible to only re-normalize if slicing over
+            // non-fixed dynamic_dimensions.
+            auto set_attributes = get_set_attributes();
+            std::unordered_map<std::string, std::vector<int64_t>> norm_inputs;
+            if(set_attributes == ends_axes)
+            {
+                // attr ends and axes set; inputs are (data, input_starts)
+                args[1].visit([&](auto input_starts) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   input_starts.template to_vector<int64_t>(),
+                                                   this->ends,
+                                                   this->axes);
+                });
+            }
+            else if(set_attributes == starts_axes)
+            {
+                // attr starts and axes set; inputs are (data, input_ends)
+                args[1].visit([&](auto input_ends) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   this->starts,
+                                                   input_ends.template to_vector<int64_t>(),
+                                                   this->axes);
+                });
+            }
+            else if(set_attributes == starts_ends)
+            {
+                // attr starts and ends set; inputs are (data, input_axes)
+                args[1].visit([&](auto input_axes) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   this->starts,
+                                                   this->ends,
+                                                   input_axes.template to_vector<int64_t>());
+                });
+            }
+            else if(set_attributes == axes_only)
+            {
+                // attr axes set; inputs are (data, input_starts, input_ends)
+                visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   input_starts.template to_vector<int64_t>(),
+                                                   input_ends.template to_vector<int64_t>(),
+                                                   this->axes);
+                });
+            }
+            else if(set_attributes == ends_only)
+            {
+                // attr ends set; inputs are (data, input_starts, input_axes)
+                visit_all(args[1], args[2])([&](auto input_starts, auto input_axes) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   input_starts.template to_vector<int64_t>(),
+                                                   this->ends,
+                                                   input_axes.template to_vector<int64_t>());
+                });
+            }
+            else if(set_attributes == starts_only)
+            {
+                // attr starts set; inputs are (data, input_ends, input_axes)
+                visit_all(args[1], args[2])([&](auto input_ends, auto input_axes) {
+                    norm_inputs =
+                        normalize_starts_ends_axes(input_shape,
+                                                   this->starts,
+                                                   input_ends.template to_vector<int64_t>(),
+                                                   input_axes.template to_vector<int64_t>());
+                });
+            }
+            else
+            {
+                // no attr set, all inputs
+                visit_all(args[1], args[2], args[3])(
+                    [&](auto input_starts, auto input_ends, auto input_axes) {
+                        norm_inputs =
+                            normalize_starts_ends_axes(input_shape,
+                                                       input_starts.template to_vector<int64_t>(),
+                                                       input_ends.template to_vector<int64_t>(),
+                                                       input_axes.template to_vector<int64_t>());
+                    });
+            }
+            auto offset = compute_offset(
+                input_shape, norm_inputs.at("norm_starts"), norm_inputs.at("norm_axes"));
+            shape calc_shape = shape{input_shape.type(),
+                                     lens_calc(input_shape.lens(),
+                                               norm_inputs.at("norm_starts"),
+                                               norm_inputs.at("norm_ends"),
+                                               norm_inputs.at("norm_axes")),
+                                     input_shape.strides()};
+            return {calc_shape, [=] { return input.data() + offset; }};
+        }
    }
+
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };


--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -55,6 +55,7 @@
 #include <migraphx/op/equal.hpp>
 #include <migraphx/op/erf.hpp>
 #include <migraphx/op/exp.hpp>
+#include <migraphx/op/fill.hpp>
 #include <migraphx/op/flatten.hpp>
 #include <migraphx/op/floor.hpp>
 #include <migraphx/op/fmod.hpp>
@@ -83,6 +84,7 @@
 #include <migraphx/op/mod.hpp>
 #include <migraphx/op/mul.hpp>
 #include <migraphx/op/multibroadcast.hpp>
+#include <migraphx/op/nearbyint.hpp>
 #include <migraphx/op/neg.hpp>
 #include <migraphx/op/nonmaxsuppression.hpp>
 #include <migraphx/op/nonzero.hpp>
@@ -109,7 +111,6 @@
 #include <migraphx/op/rnn_variable_seq_lens.hpp>
 #include <migraphx/op/rnn_var_sl_last_output.hpp>
 #include <migraphx/op/roialign.hpp>
-#include <migraphx/op/round.hpp>
 #include <migraphx/op/rsqrt.hpp>
 #include <migraphx/op/scalar.hpp>
 #include <migraphx/op/scatter_add.hpp>

--- a/src/include/migraphx/optional.hpp
+++ b/src/include/migraphx/optional.hpp
@@ -29,6 +29,17 @@
 #if defined(CPPCHECK)
 #define MIGRAPHX_HAS_OPTIONAL 1
 #define MIGRAPHX_HAS_OPTIONAL_TS 1
+#elif defined(_WIN32)
+#if _MSC_VER >= 1920
+#define MIGRAPHX_HAS_OPTIONAL 1
+#define MIGRAPHX_HAS_OPTIONAL_TS 0
+#elif _MSC_VER >= 1900
+#define MIGRAPHX_HAS_OPTIONAL 0
+#define MIGRAPHX_HAS_OPTIONAL_TS 1
+#else
+#define MIGRAPHX_HAS_OPTIONAL 0
+#define MIGRAPHX_HAS_OPTIONAL_TS 0
+#endif
 #elif defined(__has_include)
 #if __has_include(<optional>) && __cplusplus >= 201703L
 #define MIGRAPHX_HAS_OPTIONAL 1

--- a/src/include/migraphx/pad_calc.hpp
+++ b/src/include/migraphx/pad_calc.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -62,6 +62,15 @@ shape compute_padded_shape(const shape& input,
                           const std::vector<std::size_t>& stride,
                           const std::vector<std::size_t>& dilation);

+// Used for dynamic auto padding of pooling operators where padding needs to be computed at
+// evaulation time.
+MIGRAPHX_EXPORT
+shape compute_padded_pool_shape(const shape& input,
+                                const shape& kernel,
+                                const std::vector<std::size_t>& padding,
+                                const std::vector<std::size_t>& stride,
+                                const std::vector<std::size_t>& dilation);
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/include/migraphx/ranges.hpp
+++ b/src/include/migraphx/ranges.hpp
@@ -205,7 +205,7 @@ void transform(Range1&& r1, Range2&& r2, Iterator it, F f)
 }

 template <class Range>
-auto reverse(Range& r)
+auto reverse(Range&& r)
 {
    return range(std::make_reverse_iterator(r.end()), std::make_reverse_iterator(r.begin()));
 }

--- a/src/include/migraphx/run_loop.hpp
+++ b/src/include/migraphx/run_loop.hpp
@@ -31,6 +31,7 @@
 #include <migraphx/module.hpp>
 #include <migraphx/config.hpp>
 #include <migraphx/ranges.hpp>
+#include <array>
 #include <string>

 namespace migraphx {

--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -263,7 +263,7 @@ struct MIGRAPHX_EXPORT shape
    /// no padding
    bool packed() const;

-    /// Returns true is the shape has been transposed. That is the strides are not in descending
+    /// Returns true if the shape has been transposed. That is the strides are not in descending
    /// order
    bool transposed() const;


--- a/src/include/migraphx/shape_for_each.hpp
+++ b/src/include/migraphx/shape_for_each.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -37,11 +37,11 @@ inline namespace MIGRAPHX_INLINE_NS {
 template <class F>
 void shape_for_each(const migraphx::shape& s, F f)
 {
-    // Ensure calls to f use const ref to vector
-    auto call = [&f](const std::vector<std::size_t>& i) { f(i); };
    std::vector<std::size_t> indices(s.lens().size());
+    const auto& index_const_ref = indices;
    shape ss{s.type(), s.lens()};
-    for(std::size_t i = 0; i < ss.elements(); i++)
+    size_t max = ss.elements();
+    for(std::size_t i = 0; i < max; i++)
    {
        std::transform(ss.strides().begin(),
                       ss.strides().end(),
@@ -51,9 +51,13 @@ void shape_for_each(const migraphx::shape& s, F f)
                           assert(len > 0 and stride > 0);
                           return (i / stride) % len;
                       });
-        call(indices);
+        if constexpr(std::is_invocable<F, decltype(index_const_ref), decltype(i)>{})
+            f(index_const_ref, i);
+        else
+            f(index_const_ref);
    }
 }
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,25 +21,28 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
-#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
+#ifndef MIGRAPHX_GUARD_RTGLIB_SIMPLIFY_DYN_OPS_HPP
+#define MIGRAPHX_GUARD_RTGLIB_SIMPLIFY_DYN_OPS_HPP

-#include <migraphx/program.hpp>
-#include <migraphx/gpu/context.hpp>
+#include <string>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/config.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

-namespace gpu {
+struct module;

-struct MIGRAPHX_GPU_EXPORT pack_int8_args
+/**
+ * Convert dynamic ops to their static version if possible.
+ * Should be run after the split_single_dyn_dims pass.
+ */
+struct MIGRAPHX_EXPORT simplify_dyn_ops
 {
-    std::string name() const { return "gpu::pack_int8_args"; }
+    std::string name() const { return "simplify_dyn_ops"; }
    void apply(module& m) const;
-    shape pack_int8_shape(const shape& s) const;
 };

-} // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx


--- a/src/include/migraphx/simplify_reshapes.hpp
+++ b/src/include/migraphx/simplify_reshapes.hpp
@@ -38,6 +38,7 @@ struct module;
 */
 struct MIGRAPHX_EXPORT simplify_reshapes
 {
+    size_t depth = 4;
    std::string name() const { return "simplify_reshapes"; }
    void apply(module& m) const;
 };

--- a/src/include/migraphx/source_location.hpp
+++ b/src/include/migraphx/source_location.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_MIGRAPHX_SOURCE_LOCATION_HPP
 #define MIGRAPHX_GUARD_MIGRAPHX_SOURCE_LOCATION_HPP

+#include <cstdint>
 #include <migraphx/config.hpp>

 #if defined(CPPCHECK)

--- a/src/include/migraphx/streamutils.hpp
+++ b/src/include/migraphx/streamutils.hpp
@@ -30,6 +30,7 @@
 #include <migraphx/rank.hpp>
 #include <migraphx/requires.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/optional.hpp>
 #include <vector>

 namespace migraphx {
@@ -68,6 +69,19 @@ auto stream_write_value_impl(rank<1>, std::ostream& os, const T& x) -> decltype(
    os << x;
 }

+template <class T>
+auto stream_write_value_impl(rank<1>, std::ostream& os, const optional<T>& x)
+{
+    if(x.has_value())
+    {
+        os << *x;
+    }
+    else
+    {
+        os << "nullopt";
+    }
+}
+
 template <class T>
 void stream_write_value_impl(rank<1>, std::ostream& os, const std::vector<T>& r)
 {