Merge branch 'develop' into ubuntu-22.04-default

dcb98a60 · Paul · d05768a4 · d2486dcd · dcb98a60 · dcb98a60
Commit dcb98a60 authored Aug 30, 2023 by Paul
20 changed files
--- a/src/api/migraphx.py
+++ b/src/api/migraphx.py
@@ -79,7 +79,8 @@ def dynamic_dimension(h):
 def dynamic_dimensions(h):
    h.constructor(
        'create',
-        api.params(ptr='const_migraphx_dynamic_dimension_t*', size='size_t'),
+        api.params(ptr='const const_migraphx_dynamic_dimension_t*',
+                   size='size_t'),
        fname='migraphx::to_obj_vector<const_migraphx_dynamic_dimension_t>')
    h.method('size', returns='size_t')
    h.method('get',
@@ -215,7 +216,7 @@ def instruction(h):
 def instructions(h):
    h.constructor(
        'create',
-        api.params(ptr='const_migraphx_instruction_t*', size='size_t'),
+        api.params(ptr='const const_migraphx_instruction_t*', size='size_t'),
        fname='migraphx::to_obj_vector<const_migraphx_instruction_t>')

--- a/src/driver/argument_parser.hpp
+++ b/src/driver/argument_parser.hpp
@@ -338,11 +338,22 @@ struct argument_parser
    MIGRAPHX_DRIVER_STATIC auto file_exist()
    {
-        return validate([](auto&, auto&, auto& params) {
+        return validate([](auto&, auto&, const auto& params) {
            if(params.empty())
                throw std::runtime_error("No argument passed.");
            if(not fs::exists(params.back()))
-                throw std::runtime_error("Path does not exists: " + params.back());
+                throw std::runtime_error("Path does not exist: " + params.back());
+        });
+    }
+    MIGRAPHX_DRIVER_STATIC auto matches(const std::unordered_set<std::string>& names)
+    {
+        return validate([=](auto&, auto&, const auto& params) {
+            auto invalid_param = std::find_if(
+                params.begin(), params.end(), [&](const auto& p) { return names.count(p) == 0; });
+            if(invalid_param != params.end())
+                throw std::runtime_error("Invalid argument: " + *invalid_param +
+                                         ". Valid arguments are {" + to_string_range(names) + "}");
        });
    }
@@ -570,8 +581,7 @@ struct argument_parser
                        continue;
                    if(flag[0] != '-')
                        continue;
-                    auto d =
+                    std::ptrdiff_t d = levenshtein_distance(flag, input);
-                        levenshtein_distance(flag.begin(), flag.end(), input.begin(), input.end());
                    if(d < result.distance)
                        result = result_t{&arg, flag, input, d};
                }

--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -82,6 +82,7 @@ struct loader
           {"--model"},
           ap.help("Load model"),
           ap.type("resnet50|inceptionv3|alexnet"),
+           ap.matches({"resnet50", "inceptionv3", "alexnet"}),
           ap.group("input"));
        ap(file_type, {"--onnx"}, ap.help("Load as onnx"), ap.set_value("onnx"));
        ap(file_type, {"--tf"}, ap.help("Load as tensorflow"), ap.set_value("tf"));
@@ -769,7 +770,7 @@ struct main_command
        {
            std::cout << "'" << color::fg_yellow << wrong_commands.front() << color::reset
                      << "' is not a valid command." << std::endl;
-            std::cout << get_command_help("Available commands:") << std::endl;
+            std::cout << get_command_help("Available commands:");
        }
        else
        {

--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -35,6 +35,8 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS)
 static bool try_compute_shape(instruction_ref ins,
                              const std::vector<shape>& inputs,
                              const std::vector<module_ref>& mods)
@@ -78,14 +80,26 @@ static bool try_compute_shape(instruction_ref ins,
                return (arg == ins) ? new_shape : arg->get_shape();
            });
-            if(not try_compute_shape(output, input_shapes, mods))
+            if(not try_compute_shape(output, input_shapes, output->module_inputs()))
            {
                return false;
            }
        }
    }
+    catch(const std::exception& e)
+    {
+        if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+        {
+            std::cout << "Exception: " << e.what() << std::endl;
+        }
+        return false;
+    }
    catch(...)
    {
+        if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+        {
+            std::cout << "Unknown exception" << std::endl;
+        }
        return false;
    }
@@ -127,6 +141,11 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
        {
            if(arg->name() != op_name)
                continue;
+            if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+            {
+                std::cout << "eliminate_contiguous: ";
+                m.debug_print(ins);
+            }
            auto prev = arg->inputs().front();
            replace(new_args, arg, prev);
            if(try_compute_shape(ins, new_args, mod_args))

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -41,7 +41,7 @@ static literal get_scalar(instruction_ref ins)
    if(ins->name() == "contiguous")
        return get_scalar(ins->inputs().front());
    const auto& s = ins->get_shape();
-    if(s.elements() != 1 && not(s.scalar()))
+    if(s.elements() != 1 and not(s.scalar()))
        return {};
    if(not ins->can_eval())
        return {};

--- a/src/fuse_reduce.cpp
+++ b/src/fuse_reduce.cpp
@@ -52,7 +52,7 @@ struct fused_reduce
    {
        if(mods.size() != 1)
            MIGRAPHX_THROW("should have one submodule.");
-        auto* sm = mods.front();
+        const auto* sm = mods.front();
        if(sm->get_output_shapes().size() != 1)
            MIGRAPHX_THROW("Only one output supported");
        auto names = sm->get_parameter_names();
@@ -143,7 +143,7 @@ insert_module_in_submodule(module_ref sm,
 }
 static std::vector<instruction_ref>
-find_inputs(module_ref sm,
+find_inputs(const_module_ref sm,
            const module& parent,
            const std::unordered_map<instruction_ref, instruction_ref>& map_ins)
 {

--- a/src/include/migraphx/algorithm.hpp
+++ b/src/include/migraphx/algorithm.hpp
@@ -26,6 +26,8 @@
 #include <algorithm>
 #include <numeric>
+#include <string>
+#include <vector>
 #include <migraphx/config.hpp>
 namespace migraphx {
@@ -90,6 +92,42 @@ levenshtein_distance(Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterat
    return std::ptrdiff_t{1} + std::min({x1, x2, x3});
 }
+inline size_t levenshtein_distance(const std::string& s1, const std::string& s2)
+{
+    const size_t l1 = s1.length();
+    const size_t l2 = s2.length();
+    if(l1 < l2)
+        levenshtein_distance(s2, s1);
+    std::vector<size_t> d(l2 + 1);
+    std::iota(d.begin(), d.end(), 0);
+    for(size_t i = 1; i <= l1; i++)
+    {
+        size_t prev_cost = d[0];
+        d[0]             = i;
+        for(size_t j = 1; j <= l2; j++)
+        {
+            if(s1[i - 1] == s2[j - 1])
+            {
+                d[j] = prev_cost;
+            }
+            else
+            {
+                size_t cost_insert_or_delete = std::min(d[j - 1], d[j]);
+                size_t cost_substitute       = prev_cost;
+                prev_cost                    = d[j];
+                d[j]                         = std::min(cost_substitute, cost_insert_or_delete) + 1;
+            }
+        }
+    }
+    return d[l2];
+}
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/check_shapes.hpp
+++ b/src/include/migraphx/check_shapes.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -34,21 +34,37 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+// Check that deduced type is incrementable, dereferencable, and comparable
+template <class, class = void>
+struct is_iterator
+{
+};
+template <class T>
+struct is_iterator<T,
+                   std::void_t<decltype(++std::declval<T&>()),
+                               decltype(*std::declval<T&>()),
+                               decltype(std::declval<T&>() == std::declval<T&>())>> : std::true_type
+{
+};
+template <class Iterator>
 struct check_shapes
 {
-    const shape* begin;
+    static_assert(is_iterator<Iterator>{}, "CHECK_SHAPES: Deduced type must be an iterator");
-    const shape* end;
+    Iterator begin;
+    Iterator end;
    std::string name;
    bool dynamic_allowed;
-    check_shapes(const shape* b, const shape* e, const std::string& n, const bool d = false)
+    check_shapes(Iterator b, Iterator e, const std::string& n, const bool d = false)
        : begin(b), end(e), name(n), dynamic_allowed(d)
    {
        check_dynamic();
    }
    template <class Op>
-    check_shapes(const shape* b, const shape* e, const Op& op, const bool d = false)
+    check_shapes(Iterator b, Iterator e, const Op& op, const bool d = false)
        : begin(b), end(e), name(op.name()), dynamic_allowed(d)
    {
        check_dynamic();
@@ -56,7 +72,7 @@ struct check_shapes
    template <class Op>
    check_shapes(const std::vector<shape>& s, const Op& op, const bool d = false)
-        : begin(s.data()), end(s.data() + s.size()), name(op.name()), dynamic_allowed(d)
+        : begin(s.begin()), end(s.end()), name(op.name()), dynamic_allowed(d)
    {
        check_dynamic();
    }
@@ -81,8 +97,6 @@ struct check_shapes
    {
        if(begin == end)
            return 0;
-        assert(begin != nullptr);
-        assert(end != nullptr);
        return end - begin;
    }
@@ -131,8 +145,6 @@ struct check_shapes
     */
    const check_shapes& only_dims(std::size_t n) const
    {
-        assert(begin != nullptr);
-        assert(end != nullptr);
        if(begin != end)
        {
            if(begin->max_lens().size() != n)
@@ -148,8 +160,6 @@ struct check_shapes
     */
    const check_shapes& max_ndims(std::size_t n) const
    {
-        assert(begin != nullptr);
-        assert(end != nullptr);
        if(begin != end)
        {
            if(begin->max_lens().size() > n)
@@ -166,8 +176,6 @@ struct check_shapes
     */
    const check_shapes& min_ndims(std::size_t n) const
    {
-        assert(begin != nullptr);
-        assert(end != nullptr);
        if(begin != end)
        {
            if(begin->max_lens().size() < n)
@@ -330,8 +338,6 @@ struct check_shapes
    {
        if(begin == end)
            return true;
-        assert(begin != nullptr);
-        assert(end != nullptr);
        auto&& key = f(*begin);
        return this->all_of([&](const shape& s) { return f(s) == key; });
    }
@@ -341,8 +347,6 @@ struct check_shapes
    {
        if(begin == end)
            return true;
-        assert(begin != nullptr);
-        assert(end != nullptr);
        return std::all_of(begin, end, p);
    }
@@ -351,17 +355,13 @@ struct check_shapes
    {
        if(begin == end)
            return false;
-        assert(begin != nullptr);
-        assert(end != nullptr);
        return std::any_of(begin, end, p);
    }
-    const shape* get(long i) const
+    Iterator get(long i) const
    {
        if(i >= size())
            MIGRAPHX_THROW(prefix() + "Accessing shape out of bounds");
-        assert(begin != nullptr);
-        assert(end != nullptr);
        if(i < 0)
            return end - i;
        return begin + i;
@@ -394,6 +394,11 @@ struct check_shapes
    }
 };
+// Deduction guide for std::vector constructor
+template <class Op>
+check_shapes(const std::vector<shape>&, const Op&, bool d = false)
+    -> check_shapes<std::vector<shape>::const_iterator>;
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -381,22 +381,24 @@ void find_matches_for(source_location location, Mod& mod, instruction_ref ins, M
    const int trace         = value_of(MIGRAPHX_TRACE_MATCHES{});
    const bool validate     = enabled(MIGRAPHX_VALIDATE_MATCHES{});
    const auto trace_filter = string_value_of(MIGRAPHX_TRACE_MATCHES_FOR{});
-    const bool trace_for    = not trace_filter.empty() and
-                           (contains(std::string{location.file_name()}, trace_filter) or
-                            contains(std::string{location.function_name()}, trace_filter));
    bool match              = false;
    each_args(
        [&](auto&& m) {
+            const auto& matcher_name = get_type_name(m);
+            const bool trace_for     = not trace_filter.empty() and
+                                   (contains(std::string{location.file_name()}, trace_filter) or
+                                    contains(std::string{location.function_name()}, trace_filter) or
+                                    contains(matcher_name, trace_filter));
            if(match)
                return;
-            if(trace > 1 or trace_for)
+            if(trace > 1 and trace_for)
-                std::cout << "Match: " << get_type_name(m) << std::endl;
+                std::cout << "Match: " << matcher_name << std::endl;
            auto r = match_instruction(get_module(mod), ins, m.matcher());
            if(r.result == get_module(mod).end())
                return;
            if(trace > 0 or trace_for)
            {
-                std::cout << "Matched by " << get_type_name(m) << std::endl;
+                std::cout << "Matched by " << matcher_name << std::endl;
                get_module(mod).debug_print(ins);
            }
            // If its already invalid dont validate it again
@@ -407,7 +409,7 @@ void find_matches_for(source_location location, Mod& mod, instruction_ref ins, M
                auto invalid = get_module(mod).validate();
                if(invalid != get_module(mod).end())
                {
-                    std::cout << "Invalid program from match: " << get_type_name(m) << std::endl;
+                    std::cout << "Invalid program from match: " << matcher_name << std::endl;
                    std::cout << "Invalid instructions: " << std::endl;
                    get_module(mod).debug_print(invalid->inputs());
                    get_module(mod).debug_print(invalid);

--- a/src/include/migraphx/module.hpp
+++ b/src/include/migraphx/module.hpp
@@ -222,7 +222,17 @@ struct MIGRAPHX_EXPORT module
    void annotate(std::ostream& os, std::function<void(instruction_ref)> a) const;
    std::vector<module_ref> get_sub_modules(bool shallow = false) const;
+    /* sorts the module in topological order aka reverse-post order (RPO) DFS order
+       it takes last instruction or @return as the root and walks back the graph and moves inputs
+       of the each instruction such that it appears before the instruction itself.
+    */
    module& sort();
+    /* Any instruction "X" can have module arguments and those modules inside them can use any other
+     * instruction "Y" from predecessor modules of the instruction "X". Such instruction "Y" inside
+     * module args are not listed as input instructions to "X". But those instructions "Y" must be
+     * evaluted before the instruction "X" can. Therefore such "Y" instructions are considered
+     * implicit dependency to "X".
+     */
    ins_dep_map calc_implicit_deps() const;
    MIGRAPHX_EXPORT friend std::ostream& operator<<(std::ostream& os, const module& m);

--- a/src/include/migraphx/normalize_attributes.hpp
+++ b/src/include/migraphx/normalize_attributes.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -28,6 +28,7 @@
 #include <migraphx/shape.hpp>
 #include <cstring>
 #include <vector>
+#include <migraphx/op/normalize_attribute.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -42,6 +43,36 @@ struct select_dependent_type
 template <class T, class... Ts>
 using dependent_type = typename select_dependent_type<T, Ts...>::type;
+/**
+ * Used to normalize variable input axes at model runtime.
+ * Example: the axes inputs of the slice operator.
+ *
+ * \param axes the axes to normalize
+ * \param input_shape shape of the input tensor
+ * \param attr_val the normalize_axes attributes from the operator
+ * \param prefix error message prefix
+ */
+std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
+                                    const shape& input_shape,
+                                    const value& attr_val,
+                                    const std::string& prefix = "");
+/**
+ * Used to normalize variable input axes at model runtime.
+ * Example: the starts and ends inputs of the slice operator.
+ *
+ * \param indices the indices to normalize
+ * \param axes which axes the indices apply over
+ * \param input_shape shape of the input tensor
+ * \param attr_val the normalize_axes attributes from the operator
+ * \param prefix error message prefix
+ */
+std::vector<int64_t> normalize_indices(const std::vector<int64_t>& indices,
+                                       const std::vector<int64_t>& axes,
+                                       const shape& input_shape,
+                                       const value& attr_val,
+                                       const std::string& prefix = "");
 MIGRAPHX_EXPORT
 bool normalize_attributes(operation& op, const shape& input_shape);

--- a/src/include/migraphx/op/common.hpp
+++ b/src/include/migraphx/op/common.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -33,8 +33,12 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+// Specifies where to add the "extra" cell of padding if the
+// calculated padding is an odd number.
 // Padding mode is default_ for fixed shape padding.
-// same_lower and same_upper used for dynamic padding.
+// same_lower and same_upper specify dynamic padding.
+// The odd cell goes at the beginning of the dimension
+// (same_lower) or end (same_upper).
 enum padding_mode_t
 {
    default_, // NOLINT

--- a/src/include/migraphx/op/convolution.hpp
+++ b/src/include/migraphx/op/convolution.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -82,7 +82,7 @@ struct convolution
        const auto input_ndim   = inputs[0].ndim();
        const auto padding_size = padding.size();
-        if(input_ndim != padding_size / 2 + 2 && input_ndim != padding_size + 2)
+        if(input_ndim != padding_size / 2 + 2 and input_ndim != padding_size + 2)
        {
            MIGRAPHX_THROW("CONVOLUTION: input and attribute size mismatch!");
        }
@@ -206,6 +206,7 @@ struct convolution
        std::vector<std::size_t> new_padding;
        if(padding_mode != op::padding_mode_t::default_)
        {
+            // auto-Calculate the padding sizes with calc_dyn_auto_pad
            auto input_lens   = args[0].get_shape().lens();
            auto weights_lens = args[1].get_shape().lens();
            new_padding =
@@ -217,6 +218,7 @@ struct convolution
        }
        else
        {
+            // Use the padding that was given
            new_padding = padding;
            if(output_shape.dynamic())
            {

--- a/src/include/migraphx/op/if_op.hpp
+++ b/src/include/migraphx/op/if_op.hpp
@@ -71,7 +71,7 @@ struct if_op
        std::unordered_map<std::string, argument> params;
        std::set<std::string> pnames;
-        for(const auto& smod : mods)
+        for(const_module_ref smod : mods)
        {
            auto names = smod->get_parameter_names();
            pnames.insert(names.begin(), names.end());

--- a/src/include/migraphx/op/loop.hpp
+++ b/src/include/migraphx/op/loop.hpp
@@ -59,7 +59,7 @@ struct loop
            MIGRAPHX_THROW("LOOP: operator should have one submodule.");
        }
-        const auto& mod     = mods.front();
+        const_module_ref mod = mods.front();
        auto mod_out_shapes  = mod->get_output_shapes();
        auto dep_param_num   = inputs.size() - 2;

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/argument.hpp>
+#include <migraphx/pad_calc.hpp>
 #include <migraphx/par_for.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/dyn_output.hpp>
@@ -40,10 +41,20 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+// The Pooling operator mostly follows the specifications for the Onnx pooling op.
+// It assumes an NCHW layout, extended to support any number of spatial dimensions
+// from 1 on up; dimensions are <batch index, channels, spatial dimensions...>
+//
 struct pooling
 {
+    //  Class members mode, ceil_mode, padding_mode have similar names but refer to separate
+    //  concepts.
    pooling_mode mode = {pooling_mode::average};
+    // If the input has rank other than 4 then padding, stride, lengths must all be specified
+    // since the defaults have 2-dimensions.  Exception: padding not required if
+    // padding_mode != default_
    // Padding along each spatial input dimension
    // Can be ndim or 2*ndim values where ndim is size of lengths
    // ndim values means pad the same before and after each dimension
@@ -63,13 +74,14 @@ struct pooling
    // ceiling mode is a flag affecting output size
    // or equivalently, placements of the pooling kernel.
-    // When true, round the size upwards, possibly
+    // When true, round the size upwards.  When false, round down so that all
-    // including partial placements where the kernel extends beyond the edge
-    // of input and even padding.  When false, round down so that all
    // kernel placements fit but some input values may be dropped.
    bool ceil_mode = false;
    int lp_order   = 2;
+    // Mode for auto padding.  default_ indicates no auto padding.
+    padding_mode_t padding_mode = padding_mode_t::default_;
    // Global pooling with dynamic shape input
    bool dyn_global = false;
@@ -84,6 +96,7 @@ struct pooling
    {
        return pack(f(self.mode, "mode"),
                    f(self.padding, "padding"),
+                    f(self.padding_mode, "padding_mode"),
                    f(self.stride, "stride"),
                    f(self.lengths, "lengths"),
                    f(self.ceil_mode, "ceil_mode"),
@@ -97,7 +110,8 @@ struct pooling
    {
        if(dyn_global)
            return;
-        if((padding.size() != stride.size() and (padding.size()) != stride.size() * 2) or
+        if((padding_mode != default_ and padding.size() != stride.size() and
+            (padding.size()) != stride.size() * 2) or
           stride.size() != lengths.size())
        {
            MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
@@ -137,8 +151,19 @@ struct pooling
            std::size_t padding_factor = 2 * padding[i];
            if(padding.size() == 2 * kdims)
                padding_factor = padding[i] + padding[i + kdims];
-            assert(input_lens[i + 2] + padding_factor >= lengths[i]);
+            std::size_t dim_size;
-            std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
+            if(input_lens[i + 2] + padding_factor < lengths[i])
+            {
+                if(padding_mode == default_)
+                    MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size");
+                // lengths can be legitimately larger only if we're doing auto padding
+                // with a dynamic shape, in which case given padding is ignored.  Set a dummy value.
+                dim_size = 2;
+            }
+            else
+            {
+                dim_size = input_lens[i + 2] + padding_factor - lengths[i];
+            }
            std::size_t len =
                (ceil_mode)
                    ? dim_size / stride[i] +
@@ -151,17 +176,13 @@ struct pooling
    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        check_shapes{inputs, *this, true}.has(1).min_ndims(3);
        check_attribute_size();
        const shape& input = inputs.at(0);
-        auto padding_size  = padding.size();
+        auto stride_size   = stride.size();
        size_t kdims       = input.ndim() - 2;
-        if(input.ndim() < 3)
+        if(input.ndim() != stride_size + 2)
-        {
-            MIGRAPHX_THROW("POOLING: input must have 3 or more dimensions and be nonempty");
-        }
-        if(input.ndim() * 2 != padding_size + 4 and input.ndim() != padding_size + 2)
        {
            MIGRAPHX_THROW("POOLING: input and attribute size mismatch!");
        }
@@ -179,6 +200,28 @@ struct pooling
                }
                return {input.type(), output_dyn_dims};
            }
+            else if(padding_mode != default_)
+            {
+                const size_t num_spatial_dims = inputs[0].ndim() - 2;
+                const shape& x_shape          = inputs[0];
+                // same as convolution::dynamic_compute_shape()
+                for(std::size_t i = 0; i < num_spatial_dims; ++i)
+                {
+                    auto ceil_div = [](std::size_t x, std::size_t y) { return (x + y - 1) / y; };
+                    auto s        = stride[i];
+                    auto x = x_shape.dyn_dims()[i + 2];
+                    std::set<std::size_t> optimals{};
+                    std::transform(x.optimals.begin(),
+                                   x.optimals.end(),
+                                   std::inserter(optimals, optimals.begin()),
+                                   [&](auto o) { return ceil_div(o, s); });
+                    output_dyn_dims.push_back(
+                        shape::dynamic_dimension{ceil_div(x.min, s), ceil_div(x.max, s), optimals});
+                }
+                return {input.type(), output_dyn_dims};
+            }
            else
            {
                // does not compute optimals
@@ -267,6 +310,7 @@ struct pooling
                      Out& output,
                      const In& input,
                      const std::vector<std::size_t>& kernel_dims,
+                      const std::vector<std::size_t>& padding_vals,
                      Op op) const
    {
        auto in_s    = input.get_shape();
@@ -284,8 +328,8 @@ struct pooling
            for(std::size_t dim = 2; dim < n_dim; ++dim)
            {
                auto d_2  = dim - 2;
-                int start =
+                int start = static_cast<int>(idx_o[dim] * stride[d_2]) -
-                    static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]);
+                            static_cast<int>(padding_vals[d_2]);
                int end;
                // NOLINT
                if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
@@ -297,7 +341,7 @@ struct pooling
                    // Check if this kernel extends beyond the padding at end of dimension
                    end = std::min(start + kernel_dims[d_2],
-                                   in_lens[dim] + static_cast<int>(padding[d_2]));
+                                   in_lens[dim] + static_cast<int>(padding_vals[d_2]));
                }
                else
                {
@@ -316,6 +360,7 @@ struct pooling
            }
            shape win_shape{output_shape.type(), win_size};
            auto pool_size    = win_shape.elements();
            double output_val = op.template init<Type>();
@@ -354,30 +399,65 @@ struct pooling
    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{dyn_out.computed_shape};
+        argument result;
        auto input_lens = args[0].get_shape().lens();
        std::vector<std::size_t> kernel_dims;
+        shape output_shape;
+        // If we have to auto-calculate padding, it will be passed to calc_pooling() as an argument
+        // instead of the member variable padding.
+        std::vector<std::size_t> temp_padding(padding);
        if(dyn_global)
        {
+            // for dynamic GlobalPooling, there's no padding
            kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
+            output_shape = dyn_out.computed_shape;
+            result       = dyn_out.computed_shape;
        }
-        else
+        else if((padding_mode != op::padding_mode_t::default_))
+        {
+            // if padding_mode is set, input was a dynamic size.  Calculate padded size now.
+            // kernel_lens is the same as kernel_dims, but prepended with the 2 non-
+            // spatial dimensions.  For size computations, it's used like the weights
+            // tensor for convolutions.
+            std::vector<std::size_t> kernel_lens;
+            kernel_lens.insert(kernel_lens.end(), input_lens.begin(), input_lens.begin() + 2);
+            kernel_lens.insert(kernel_lens.end(), lengths.begin(), lengths.end());
+            kernel_dims = this->lengths;
+            auto type = args[0].get_shape().type();
+            // dilation not currently supported for pooling, so default to all 1's
+            temp_padding = calc_dyn_auto_pad(
+                input_lens, kernel_lens, stride, {1, 1}, bool(padding_mode == op::same_upper));
+            output_shape = compute_padded_pool_shape(
+                args[0].get_shape(), shape(type, kernel_dims), temp_padding, stride, {1, 1});
+            result = argument(output_shape);
+        }
+        else // fixed/static input
        {
            kernel_dims  = this->lengths;
+            output_shape = dyn_out.computed_shape;
+            result       = dyn_out.computed_shape;
        }
+        // Perform the computation and populate result
        visit_all(result, args[0])([&](auto output, auto input) {
            using type = typename decltype(output)::value_type;
            switch(mode)
            {
            case migraphx::op::pooling_mode::average:
-                calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, avg_pool{});
+                calc_pooling<type>(
+                    output_shape, output, input, kernel_dims, temp_padding, avg_pool{});
                break;
            case migraphx::op::pooling_mode::max:
-                calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, max_pool{});
+                calc_pooling<type>(
+                    output_shape, output, input, kernel_dims, temp_padding, max_pool{});
                break;
            case migraphx::op::pooling_mode::lpnorm:
                calc_pooling<type>(
-                    dyn_out.computed_shape, output, input, kernel_dims, lpnorm_pool{lp_order});
+                    output_shape, output, input, kernel_dims, temp_padding, lpnorm_pool{lp_order});
                break;
            }
        });

--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
@@ -27,19 +27,34 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
-#include <migraphx/dyn_output.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/normalize_attributes.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Slice operator that accepts variable axes, starts and ends.
+ *
+ * Attributes:
+ * axes: constant axes to slice over (optional)
+ * starts: constant slice starting indices (optional)
+ * ends: constant slice ending indices (optional)
+ *
+ * Parameters:
+ * data: the input tensor to slice (dynamic or static shape)
+ * input_starts: starting indicies of slice (optional, static shape)
+ * input_ends: ending indicies of slice (optional, static shape)
+ * input_axes: axes to slice over (optional, static shape)
+ */
 struct slice
 {
-    std::vector<int64_t> axes;
+    std::vector<int64_t> axes{};
-    std::vector<int64_t> starts;
+    std::vector<int64_t> starts{};
-    std::vector<int64_t> ends;
+    std::vector<int64_t> ends{};
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -48,8 +63,8 @@ struct slice
    }
    /**
-     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are in
+     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are
-     * limits.
+     * within limits.
     */
    value attributes() const
    {
@@ -70,100 +85,235 @@ struct slice
    std::string name() const { return "slice"; }
-    auto compute_offset(const shape& s) const
+    /**
-    {
+     * Computes the slice output shape dimensions for given starts, ends,and axes.
-        const std::vector<std::size_t>& lens    = s.lens();
+     * Templated to also handle tensor views.
-        const std::vector<std::size_t>& strides = s.strides();
+     * Possibily different type between [in_starts, in_ends] and [in_axes] if in_axes is this
-        auto offset                             = 0;
+     * object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
-        if(not axes.empty())
+     */
-        {
+    template <class A, class B>
-            for(std::size_t i = 0; i < axes.size(); i++)
+    std::vector<std::size_t>
-            {
+    lens_calc(const std::vector<std::size_t>& lengths, A in_starts, A in_ends, B in_axes) const
-                auto axis = axes[i];
-                offset += starts[i] * strides[axis];
-            }
-        }
-        else
    {
-            for(std::size_t axis = 0; axis < lens.size(); axis++)
+        auto new_lens = lengths;
+        for(std::size_t i = 0; i < in_axes.size(); ++i)
        {
-                offset += starts[axis] * strides[axis];
+            auto axis      = in_axes[i];
+            new_lens[axis] = in_ends[i] - in_starts[i];
        }
-        }
+        return new_lens;
-        return offset;
    }
    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        check_shapes{inputs, *this, true}.has(1, 3, 4);
        auto input_shape = inputs[0];
+        if(inputs.size() == 1)
+        {
            auto t = input_shape.type();
-        // TODO:  When support for dynamic shapes is added to normalize_attributes,
-        //  remove this restriction.
            if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
                   return not input_shape.dyn_dims()[axis].is_fixed();
               }))
            {
                MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
            }
-        // For a static shape, old_lens will be adjusted to a new size
-        // for those axes that are sliced.
-        // For dynamic shape, the adjusted old_lens become the new max values,
-        // while updating the old mins and optimals if possible.
-        std::vector<std::size_t> new_mins;
-        std::vector<std::size_t> old_lens;
-        std::vector<std::size_t> old_strides;
-        // Doesn't handle optimals
            if(input_shape.dynamic())
            {
-            old_lens = input_shape.max_lens();
+                return shape{t,
-            new_mins = input_shape.min_lens();
+                             lens_calc(input_shape.min_lens(), starts, ends, axes),
+                             lens_calc(input_shape.max_lens(), starts, ends, axes),
+                             {}};
+            }
+            else
+            {
+                return shape{
+                    t, lens_calc(input_shape.lens(), starts, ends, axes), input_shape.strides()};
+            }
+        }
+        else
+        {
+            // check that starts, ends, and optionally input_axes are all 1D, have the same
+            // dimension, and are static
+            check_shapes{inputs.begin() + 1,
+                         inputs.end(),
+                         std::string("SLICE: inputs (starts, ends, and input_axes)"),
+                         false}
+                .only_dims(1)
+                .same_dims();
+            auto dds = input_shape.to_dynamic().dyn_dims();
+            if(inputs.size() == 3)
+            {
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: inputs starts and ends do not have the same dimension "
+                                   "as the axes attribute");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
            }
            else
            {
-            old_lens = input_shape.lens();
+                // if axes is an input, then all the output dimensions could be 0 to the max value
-            // For static shape (including during eval step after a dynamic input) the strides are
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
-            // indexed into the pre-slice array, so they are larger than the apparent size of the
+                    return shape::dynamic_dimension{0, dd.max};
-            // resulting shape.
+                });
-            old_strides = input_shape.strides();
+            }
+            return shape{input_shape.type(), dds};
+        }
    }
-        std::vector<std::size_t> new_lens = old_lens;
+    /**
+     * Calculates the starting offset for the sliced tensor.
+     * Used in compute when only data input and all other information are in the attributes.
+     *
+     * \param s static input shape
+     */
+    auto compute_offset(const shape& s) const
+    {
+        const std::vector<std::size_t>& lens    = s.lens();
+        const std::vector<std::size_t>& strides = s.strides();
+        auto offset                             = 0;
+        if(not axes.empty())
+        {
            for(std::size_t i = 0; i < axes.size(); i++)
            {
                auto axis = axes[i];
-            size_t sliced_length = ends[i] - starts[i];
+                offset += starts[i] * strides[axis];
-            // A Numpy indexing convention: a slice size larger than the actual dimension
+            }
-            // is legal and the "ends" value is clipped to the axis size
+        }
-            new_lens[axis] = std::min(new_lens[axis], sliced_length);
+        else
-            if(input_shape.dynamic())
+        {
+            for(std::size_t axis = 0; axis < lens.size(); axis++)
            {
-                // TODO: when non-fixed shape slicing is allowed, this will be different than
+                offset += starts[axis] * strides[axis];
-                // sliced_length, making use of TBD start/end values.
-                std::size_t sliced_min_length = ends[i] - starts[i];
-                // if the slice size is smaller than maxes but larger than mins
-                new_mins[axis] = std::min(sliced_min_length, new_mins[axis]);
            }
        }
-        if(input_shape.dynamic())
+        return offset * s.type_size();
+    }
+    /**
+     * Calculates the starting offset for the sliced tensor (for aliasing).
+     * Used when the starts and/or the axes are inputs.
+     *
+     * \param s static input shape
+     * \param input_starts starting indices of slice
+     * \param ax_vec axes to slice on
+     */
+    template <class IndView, class Axes>
+    auto compute_offset(const shape& s, const IndView& input_starts, const Axes& ax_vec) const
    {
-            return shape{t, new_mins, new_lens, {}};
+        auto ret = 0;
+        for(std::size_t i = 0; i < ax_vec.size(); ++i)
+        {
+            auto axis = ax_vec[i];
+            ret += input_starts[i] * s.strides().at(axis);
        }
-        else
+        return ret * s.type_size();
+    }
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(const shape& input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends) const
    {
-            return shape{t, new_lens, old_strides};
+        auto attrs = this->attributes().at("normalize_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input_ends")}};
    }
+    /**
+     * Three input version of the normalize_inputs.
+     * This one also checks that the input_axes are valid.
+     */
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(shape input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends,
+                     const std::vector<int64_t>& input_axes) const
+    {
+        auto attrs = this->attributes().at("normalize_axes");
+        auto norm_axes =
+            normalize_axes(input_axes, input_shape, attrs.at("axes"), "Slice variable input_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input ends")},
+                {"input_axes", norm_axes}};
    }
    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
        auto input       = args[0];
+        auto input_shape = input.get_shape();
-        auto offset = compute_offset(input.get_shape()) * dyn_out.computed_shape.type_size();
+        switch(args.size())
+        {
+        case 1: {
+            std::size_t offset = compute_offset(input_shape);
            return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
        }
+        case 3: {
+            shape calc_shape;
+            std::size_t offset = 0;
+            visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
+                auto norm_inputs = normalize_inputs(input_shape,
+                                                    input_starts.template to_vector<int64_t>(),
+                                                    input_ends.template to_vector<int64_t>());
+                offset = compute_offset(input_shape, norm_inputs.at("input_starts"), this->axes);
+                calc_shape = {input_shape.type(),
+                              lens_calc(input_shape.lens(),
+                                        norm_inputs.at("input_starts"),
+                                        norm_inputs.at("input_ends"),
+                                        this->axes),
+                              input_shape.strides()};
+            });
+            return {calc_shape, [=] { return input.data() + offset; }};
+        }
+        case 4: {
+            shape calc_shape;
+            std::size_t offset = 0;
+            visit_all(args[1], args[2], args[3])(
+                [&](auto input_starts, auto input_ends, auto input_axes) {
+                    auto norm_inputs = normalize_inputs(input_shape,
+                                                        input_starts.template to_vector<int64_t>(),
+                                                        input_ends.template to_vector<int64_t>(),
+                                                        input_axes.template to_vector<int64_t>());
+                    offset           = compute_offset(
+                        input_shape, norm_inputs.at("input_starts"), norm_inputs.at("input_axes"));
+                    calc_shape = shape{input_shape.type(),
+                                       lens_calc(input_shape.lens(),
+                                                 norm_inputs.at("input_starts"),
+                                                 norm_inputs.at("input_ends"),
+                                                 norm_inputs.at("input_axes")),
+                                       input_shape.strides()};
+                });
+            return {calc_shape, [=] { return input.data() + offset; }};
+        }
+        default: {
+            // Should never get here; covering in case some code change occurs
+            MIGRAPHX_THROW("SLICE: invalid number of inputs");
+        }
+        }
+    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/pad_calc.hpp
+++ b/src/include/migraphx/pad_calc.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -62,6 +62,14 @@ shape compute_padded_shape(const shape& input,
                           const std::vector<std::size_t>& stride,
                           const std::vector<std::size_t>& dilation);
+// Used for dynamic auto padding of pooling operators where padding needs to be computed at
+// evaulation time.
+shape compute_padded_pool_shape(const shape& input,
+                                const shape& kernel,
+                                const std::vector<std::size_t>& padding,
+                                const std::vector<std::size_t>& stride,
+                                const std::vector<std::size_t>& dilation);
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -389,7 +389,7 @@ void instruction::print(std::ostream& os,
    if(not ins->module_inputs().empty())
    {
        std::string delim = ", [";
-        for(auto&& mod_arg : ins->module_inputs())
+        for(const const_module_ref& mod_arg : ins->module_inputs())
        {
            os << delim << mod_arg->name();
            delim = ", ";

--- a/src/memory_coloring.cpp
+++ b/src/memory_coloring.cpp
@@ -23,9 +23,9 @@
 */
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/module.hpp>
-#include <migraphx/operators.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/iterator_for.hpp>
+#include <migraphx/make_op.hpp>
 #include <migraphx/functional.hpp>
 #include <migraphx/algorithm.hpp>
 #include <migraphx/ranges.hpp>
@@ -382,7 +382,8 @@ void memory_coloring::apply(module& m) const
        auto s             = ins->get_shape();
        std::size_t offset = seg.first * alignment;
        assert(offset < n);
-        m.replace_instruction(ins, op::load{s, offset}, mem);
+        m.replace_instruction(
+            ins, make_op("load", {{"shape", to_value(s)}, {"offset", offset}}), mem);
    }
    // Replace zero allocation
@@ -391,7 +392,8 @@ void memory_coloring::apply(module& m) const
        if(ins->name() != allocation_op)
            continue;
        assert(ins->get_shape().bytes() == 0);
-        m.replace_instruction(ins, op::load{ins->get_shape(), 0}, mem);
+        m.replace_instruction(
+            ins, make_op("load", {{"shape", to_value(ins->get_shape())}, {"offset", 0}}), mem);
    }
    // Remove scratch parameter if its not used