Merge

2fc6b715 · Paul · 5967d68d · 118e05c7 · 2fc6b715 · 2fc6b715
Commit 2fc6b715 authored Apr 14, 2023 by Paul
20 changed files
--- a/src/include/migraphx/op/allocate.hpp
+++ b/src/include/migraphx/op/allocate.hpp
@@ -44,7 +44,7 @@ struct allocate
    std::string name() const { return "allocate"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {
-        migraphx::check_shapes{inputs, *this}.has(0);
+        migraphx::check_shapes{inputs, *this, true}.has(0);
        return s;
    }
    argument compute(const shape& output_shape, const std::vector<argument>&) const

--- a/src/include/migraphx/op/argmax.hpp
+++ b/src/include/migraphx/op/argmax.hpp
@@ -62,7 +62,7 @@ struct argmax
        if(s0.dynamic())
        {
            auto dyn_dims  = s0.dyn_dims();
-            dyn_dims[axis] = {1, 1, 0};
+            dyn_dims[axis] = {1, 1};
            return {shape::int64_type, dyn_dims};
        }
        else

--- a/src/include/migraphx/op/concat.hpp
+++ b/src/include/migraphx/op/concat.hpp
@@ -134,7 +134,7 @@ struct concat
            }
            auto new_dims  = inputs[0].dyn_dims();
-            new_dims[axis] = migraphx::shape::dynamic_dimension{new_min, new_max, 0};
+            new_dims[axis] = migraphx::shape::dynamic_dimension{new_min, new_max};
            return {inputs[0].type(), new_dims};
        }
        else

--- a/src/include/migraphx/op/contiguous.hpp
+++ b/src/include/migraphx/op/contiguous.hpp
@@ -48,7 +48,7 @@ struct contiguous
    {
        check_shapes{inputs, *this, true}.has(1);
        auto s0 = inputs.front();
-        if(s0.dynamic() or s0.standard())
+        if(s0.dynamic())
        {
            return s0;
        }

--- a/src/include/migraphx/op/convolution.hpp
+++ b/src/include/migraphx/op/convolution.hpp
@@ -24,9 +24,12 @@
 #ifndef MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_HPP
 #define MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_HPP
+#include <migraphx/argument.hpp>
 #include <migraphx/op/common.hpp>
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/convolution.hpp>
+#include <migraphx/pad_calc.hpp>
 #include <migraphx/value.hpp>
 #include <cmath>
 #include <utility>
@@ -35,6 +38,10 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Convolution operator. Does not support optimal dimensions for spatial dimensions. Returns empty
+ * optimals.
+ */
 struct convolution
 {
    std::vector<std::size_t> padding  = {0, 0};
@@ -145,7 +152,7 @@ struct convolution
            else
            {
                auto l = input_shape.lens().at(0);
-                output_dyn_dims.push_back({l, l, 0});
+                output_dyn_dims.push_back({l, l});
            }
        };
@@ -162,25 +169,30 @@ struct convolution
                if(x_shape.dynamic())
                {
                    auto x = x_shape.dyn_dims()[i + 2];
-                    output_dyn_dims.push_back(shape::dynamic_dimension{
+                    std::set<std::size_t> optimals{};
-                        ceil_div(x.min, s), ceil_div(x.max, s), ceil_div(x.opt, s)});
+                    std::transform(x.optimals.begin(),
+                                   x.optimals.end(),
+                                   std::inserter(optimals, optimals.begin()),
+                                   [&](auto o) { return ceil_div(o, s); });
+                    output_dyn_dims.push_back(
+                        shape::dynamic_dimension{ceil_div(x.min, s), ceil_div(x.max, s), optimals});
                }
                else
                {
                    auto od = ceil_div(x_shape.lens()[i + 2], s);
-                    output_dyn_dims.push_back(shape::dynamic_dimension{od, od, 0});
+                    output_dyn_dims.push_back(shape::dynamic_dimension{od, od});
                }
            }
        }
        else
        {
+            // Does not compute for optimals
            auto min_spatial_dims = calc_conv_lens(x_shape.min_lens(), w_shape.max_lens());
            auto max_spatial_dims = calc_conv_lens(x_shape.max_lens(), w_shape.min_lens());
-            auto opt_spatial_dims = calc_conv_lens(x_shape.opt_lens(), w_shape.opt_lens());
            for(size_t i = 0; i < num_spatial_dims; ++i)
            {
-                output_dyn_dims.push_back(shape::dynamic_dimension{
+                output_dyn_dims.push_back(
-                    min_spatial_dims[i], max_spatial_dims[i], opt_spatial_dims[i]});
+                    shape::dynamic_dimension{min_spatial_dims[i], max_spatial_dims[i], {}});
            }
        }
        return shape{x_shape.type(), output_dyn_dims};
@@ -201,6 +213,37 @@ struct convolution
        check_attribute_size();
        return stride.size();
    }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        std::vector<std::size_t> new_padding;
+        if(padding_mode != op::padding_mode_t::default_)
+        {
+            auto input_lens   = args[0].get_shape().lens();
+            auto weights_lens = args[1].get_shape().lens();
+            new_padding =
+                padding_mode == op::same_upper
+                    ? calc_dyn_auto_pad(input_lens, weights_lens, stride, dilation, true)
+                    : calc_dyn_auto_pad(input_lens, weights_lens, stride, dilation, false);
+            output_shape = compute_padded_shape(
+                args[0].get_shape(), args[1].get_shape(), new_padding, stride, dilation);
+        }
+        else
+        {
+            new_padding = padding;
+            if(output_shape.dynamic())
+            {
+                output_shape =
+                    normalize_compute_shape({args.at(0).get_shape(), args.at(1).get_shape()});
+            }
+        }
+        argument result{output_shape};
+        visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
+            migraphx::convolution(output, input, weights, new_padding, stride, group);
+        });
+        return result;
+    }
 };
 } // namespace op

--- a/src/include/migraphx/op/dequantizelinear.hpp
+++ b/src/include/migraphx/op/dequantizelinear.hpp
@@ -40,7 +40,11 @@ struct dequantizelinear
    std::string name() const { return "dequantizelinear"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.same_dims();
+        check_shapes{inputs, *this}.same_dims().has(2, 3);
+        if(inputs.size() == 3 and inputs[0].type() != inputs[2].type())
+        {
+            MIGRAPHX_THROW("DEQUANTIZELINEAR: Zero point and input should be the same type.");
+        }
        return {inputs[1].type(), inputs[0].lens(), inputs[0].strides()};
    }

--- a/src/include/migraphx/op/flatten.hpp
+++ b/src/include/migraphx/op/flatten.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/dyn_output.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -59,27 +60,22 @@ struct flatten
        auto s = inputs[0];
        if(s.dynamic())
        {
+            // Doesn't handle optimals
            auto min_lens = s.min_lens();
            auto max_lens = s.max_lens();
-            auto opt_lens = s.opt_lens();
            // If any of the opt values is 0, output opt will be 0
            shape::dynamic_dimension x = {
                std::accumulate(
                    min_lens.begin(), min_lens.begin() + axis, std::size_t{1}, std::multiplies<>{}),
                std::accumulate(
                    max_lens.begin(), max_lens.begin() + axis, std::size_t{1}, std::multiplies<>{}),
-                std::accumulate(opt_lens.begin(),
+                {}};
-                                opt_lens.begin() + axis,
-                                std::size_t{1},
-                                std::multiplies<>{})};
            shape::dynamic_dimension y = {
                std::accumulate(
                    min_lens.begin() + axis, min_lens.end(), std::size_t{1}, std::multiplies<>{}),
                std::accumulate(
                    max_lens.begin() + axis, max_lens.end(), std::size_t{1}, std::multiplies<>{}),
-                std::accumulate(
+                {}};
-                    opt_lens.begin() + axis, opt_lens.end(), std::size_t{1}, std::multiplies<>{}),
-            };
            return {s.type(), {x, y}};
        }
        else

--- a/src/include/migraphx/op/gathernd.hpp
+++ b/src/include/migraphx/op/gathernd.hpp
@@ -121,7 +121,7 @@ struct gathernd
            // A rank 0 output is a scalar
            if(output_ndim == 0)
-                return shape(data_shape.type(), {shape::dynamic_dimension({1, 1, 0})});
+                return shape(data_shape.type(), {shape::dynamic_dimension({1, 1})});
            // Part of the output shape comes from indices tensor, part from data tensor
            std::vector<shape::dynamic_dimension> output_dims(output_ndim);

--- a/src/include/migraphx/op/nonmaxsuppression.hpp
+++ b/src/include/migraphx/op/nonmaxsuppression.hpp
@@ -119,8 +119,8 @@ struct nonmaxsuppression
                fixed_shape_error_check();
            }
            std::vector<shape::dynamic_dimension> out_lens = {};
-            out_lens.push_back({0, max_num_boxes, 0});
+            out_lens.push_back({0, max_num_boxes});
-            out_lens.push_back({3, 3, 0});
+            out_lens.push_back({3, 3});
            return {shape::int64_type, out_lens};
        }
        else

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
@@ -89,25 +89,17 @@ struct pooling
        std::vector<std::size_t> output_lens{};
        for(size_t i = 0; i < kdims; ++i)
        {
-            if(input_lens[i + 2] == 0)
+            std::size_t padding_factor = 2 * padding[i];
-            {
+            if(padding.size() == 2 * kdims)
-                // handle opt = 0
+                padding_factor = padding[i] + padding[i + kdims];
-                output_lens.push_back(0);
+            assert(input_lens[i + 2] + padding_factor >= lengths[i]);
-            }
+            std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
-            else
+            std::size_t len =
-            {
+                (ceil_mode)
-                std::size_t padding_factor = 2 * padding[i];
+                    ? dim_size / stride[i] +
-                if(padding.size() == 2 * kdims)
+                          static_cast<std::size_t>((dim_size % stride[i] != 0)) // ceil uint divide
-                    padding_factor = padding[i] + padding[i + kdims];
+                    : dim_size / stride[i];                                     // floor divide
-                assert(input_lens[i + 2] + padding_factor >= lengths[i]);
+            output_lens.push_back(len + 1);
-                std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
-                std::size_t len =
-                    (ceil_mode)
-                        ? dim_size / stride[i] + static_cast<std::size_t>((dim_size % stride[i] !=
-                                                                           0)) // ceil uint divide
-                        : dim_size / stride[i];                                // floor divide
-                output_lens.push_back(len + 1);
-            }
        }
        return output_lens;
    }
@@ -134,19 +126,19 @@ struct pooling
            {
                for(size_t i = 0; i < kdims; ++i)
                {
-                    output_dyn_dims.push_back(shape::dynamic_dimension{1, 1, 1});
+                    output_dyn_dims.push_back(shape::dynamic_dimension{1, 1});
                }
                return {input.type(), output_dyn_dims};
            }
            else
            {
+                // does not compute for optimals
                auto min_spatial_dims = calc_spatial_dim_out(input.min_lens(), kdims);
                auto max_spatial_dims = calc_spatial_dim_out(input.max_lens(), kdims);
-                auto opt_spatial_dims = calc_spatial_dim_out(input.opt_lens(), kdims);
                for(size_t i = 0; i < kdims; ++i)
                {
-                    output_dyn_dims.push_back(shape::dynamic_dimension{
+                    output_dyn_dims.push_back(
-                        min_spatial_dims[i], max_spatial_dims[i], opt_spatial_dims[i]});
+                        shape::dynamic_dimension{min_spatial_dims[i], max_spatial_dims[i], {}});
                }
                return {input.type(), output_dyn_dims};
            }

--- a/src/include/migraphx/op/quant_convolution.hpp
+++ b/src/include/migraphx/op/quant_convolution.hpp
@@ -25,8 +25,10 @@
 #define MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
 #include <migraphx/op/common.hpp>
+#include <migraphx/argument.hpp>
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/convolution.hpp>
 #include <migraphx/value.hpp>
 #include <cmath>
 #include <utility>
@@ -114,6 +116,17 @@ struct quant_convolution
        check_attribute_size();
        return stride.size();
    }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        result.visit([&](auto output) {
+            visit_all(args[0], args[1])([&](auto input, auto weights) {
+                migraphx::convolution(output, input, weights, padding, stride, group);
+            });
+        });
+        return result;
+    }
 };
 } // namespace op

--- a/src/include/migraphx/op/quantizelinear.hpp
+++ b/src/include/migraphx/op/quantizelinear.hpp
@@ -40,7 +40,11 @@ struct quantizelinear
    std::string name() const { return "quantizelinear"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.same_dims();
+        check_shapes{inputs, *this}.same_dims().has(2, 3);
+        if(inputs[0].type() != inputs[1].type())
+        {
+            MIGRAPHX_THROW("QUANTIZELINEAR: Scales and input must be the same type");
+        }
        if(inputs.size() == 3)
        {
            return {inputs[2].type(), inputs[0].lens(), inputs[0].strides()};
@@ -61,17 +65,15 @@ struct quantizelinear
        argument result{output_shape};
        visit_all(result, y_zero_point)([&](auto output, auto zero_pts) {
-            x.visit([&](auto input) {
+            visit_all(x, y_scale)([&](auto input, auto scales) {
-                y_scale.visit([&](auto scales) {
+                using quant_type = typename decltype(output)::value_type;
-                    using quant_type = typename decltype(output)::value_type;
+                auto min_value   = std::numeric_limits<quant_type>::min();
-                    auto min_value   = std::numeric_limits<quant_type>::min();
+                auto max_value   = std::numeric_limits<quant_type>::max();
-                    auto max_value   = std::numeric_limits<quant_type>::max();
+                par_for(output_shape.elements(), [&](auto i) {
-                    par_for(output_shape.elements(), [&](auto i) {
+                    int64_t quantized = static_cast<int64_t>(std::round(input[i] / scales[i])) +
-                        int64_t quantized = static_cast<int64_t>(std::round(input[i] / scales[i])) +
+                                        static_cast<int64_t>(zero_pts[i]);
-                                            static_cast<int64_t>(zero_pts[i]);
+                    output[i] = std::max(static_cast<int64_t>(min_value),
-                        output[i] = std::max(static_cast<int64_t>(min_value),
+                                         std::min(static_cast<int64_t>(max_value), quantized));
-                                             std::min(static_cast<int64_t>(max_value), quantized));
-                    });
                });
            });
        });

--- a/src/include/migraphx/op/reduce_op.hpp
+++ b/src/include/migraphx/op/reduce_op.hpp
@@ -91,7 +91,7 @@ struct reduce_op : op_name<Derived>
    {
        value normalize;
        normalize["axes"] = value::array{normalize_attribute::include_min};
-        return {{"normalize_axes", normalize}};
+        return {{"normalize_axes", normalize}, {"reduce", true}};
    }
    std::vector<int64_t> tune_axes(std::size_t n_dim) const
@@ -123,9 +123,7 @@ struct reduce_op : op_name<Derived>
            auto tuned_axes      = tune_axes(output_dyn_dims.size());
            for(const auto& axis : tuned_axes)
            {
-                // At the time of writing, there's no functional difference between
+                output_dyn_dims[axis] = {1, 1};
-                // optimum of 0 (no opt) or 1.
-                output_dyn_dims[axis] = {1, 1, 0};
            }
            return shape{s.type(), output_dyn_dims};

--- a/src/include/migraphx/op/reshape.hpp
+++ b/src/include/migraphx/op/reshape.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/dyn_output.hpp>
+#include <migraphx/optional.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -115,6 +116,10 @@ struct reshape
                                  StrideIterator stride_start,
                                  StrideIterator stride_last)
    {
+<<<<<<< HEAD
+=======
+        assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
+>>>>>>> origin/reshape-nonstandard
        auto cstride = *std::prev(stride_last);
        return std::equal(std::make_reverse_iterator(dim_last),
                          std::make_reverse_iterator(dim_start + 1),
@@ -126,11 +131,93 @@ struct reshape
                          });
    }
+<<<<<<< HEAD
    shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
    {
        check_shapes{inputs, *this}.has(1);
        auto&& idims    = inputs.front().lens();
        auto&& istrides = inputs.front().strides();
+=======
+    static optional<shape> reshape_dims(const shape& input, const std::vector<std::size_t>& rdims)
+    {
+        if(input.standard())
+            return shape{input.type(), rdims};
+        const auto& idims    = input.lens();
+        const auto& istrides = input.strides();
+        std::vector<std::size_t> rstrides;
+        std::size_t i = 0;
+        std::size_t r = 0;
+        while(i < idims.size() and r < rdims.size())
+        {
+            auto idim = idims[i];
+            auto rdim = rdims[r];
+            if(rdim == idim)
+            {
+                rstrides.push_back(istrides[i]);
+            }
+            // squeeze
+            else if(rdim > idim)
+            {
+                auto start = idims.begin() + i;
+                auto it    = compute_end_dim(start, idims.end(), rdim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                if((i + n) > istrides.size())
+                    return nullopt;
+                if(not can_strides_merge(
+                       start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1))
+                    return nullopt;
+                i += n;
+                rstrides.push_back(istrides[i]);
+            }
+            // unsqueeze
+            else // if(rdim < idim)
+            {
+                auto start = rdims.begin() + i;
+                auto it    = compute_end_dim(start, rdims.end(), idim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                if((r + n) > rdims.size())
+                    return nullopt;
+                auto stride = istrides[i] * idim;
+                std::for_each(start, it + 1, [&](auto dim) {
+                    stride /= dim;
+                    rstrides.push_back(stride);
+                });
+                r += n;
+            }
+            i++;
+            r++;
+        }
+        // Handle trailing 1s
+        if(rstrides.size() < rdims.size() and not rstrides.empty())
+        {
+            auto stride = rstrides.back();
+            for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
+            {
+                if(d != 1)
+                    return nullopt;
+                rstrides.push_back(stride);
+            }
+        }
+        if(rdims.size() != rstrides.size())
+            return nullopt;
+        return shape{input.type(), rdims, rstrides};
+    }
+    shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        auto&& idims = inputs.front().lens();
+        // auto&& istrides = inputs.front().strides();
+>>>>>>> origin/reshape-nonstandard
        std::vector<std::size_t> rdims(dims.begin(), dims.end());
        for(std::size_t i = 0; i < dims.size(); i++)
@@ -156,6 +243,7 @@ struct reshape
            }
        }
+<<<<<<< HEAD
        shape s;
        if(inputs.front().standard())
        {
@@ -232,10 +320,19 @@ struct reshape
        assert(s.bytes() == inputs.front().bytes());
        if(s.elements() != inputs.front().elements())
+=======
+        auto s = reshape_dims(inputs.front(), rdims);
+        if(not s.has_value())
+            MIGRAPHX_THROW("Reshape on axis that is not packed.");
+        if(s->elements() != inputs.front().elements())
+>>>>>>> origin/reshape-nonstandard
            MIGRAPHX_THROW("Reshape: Wrong number of elements for reshape: reshape has " +
-                           std::to_string(s.elements()) + " elements whereas the input has " +
+                           std::to_string(s->elements()) + " elements whereas the input has " +
                           std::to_string(inputs.front().elements()));
-        return s;
+        assert(s->bytes() == inputs.front().bytes());
+        return *s;
    }
    shape compute_shape(std::vector<shape> inputs) const

--- a/src/include/migraphx/op/select_module.hpp
+++ b/src/include/migraphx/op/select_module.hpp
@@ -43,50 +43,100 @@ struct select_module
    std::string name() const { return "select_module"; }
-    shape compute_shape(const std::vector<shape>&, const std::vector<module_ref>&) const
+    shape compute_shape(const std::vector<shape>& inputs, const std::vector<module_ref>&) const
    {
+        check_shapes{inputs, *this, true}.has_at_least(1);
        return shape{output_dyn_shapes};
    }
+    std::vector<std::string> get_input_parameter_names(module_ref mod) const
+    {
+        auto param_names = mod->get_parameter_names();
+        std::vector<std::string> ret;
+        std::copy_if(param_names.cbegin(),
+                     param_names.cend(),
+                     std::back_inserter(ret),
+                     [](auto pn) { return not contains(pn, "#output_"); });
+        return ret;
+    }
+    std::vector<std::string> get_output_parameter_names(module_ref mod) const
+    {
+        auto param_names = mod->get_parameter_names();
+        std::vector<std::string> ret;
+        std::copy_if(param_names.cbegin(),
+                     param_names.cend(),
+                     std::back_inserter(ret),
+                     [](auto pn) { return contains(pn, "#output_"); });
+        return ret;
+    }
    argument compute(const shape&,
                     const std::vector<argument>& args,
                     const std::vector<module_ref>& submodule_list,
                     const std::function<std::vector<argument>(
                         module_ref&, const std::unordered_map<std::string, argument>&)>& run) const
    {
-        // find submodule with input parameter shapes exactly the same as the input arguments
+        // Find submodule with input parameter shapes exactly the same as the input instruction
-        // assuming arguments are in the same order as the input parameters
+        // arguments. Assuming instruction arguments are in the same order as the instruction
+        // parameters.
        auto module_iter =
            std::find_if(submodule_list.cbegin(), submodule_list.cend(), [&](module_ref mr) {
-                auto param_names = mr->get_parameter_names();
+                auto in_param_names = get_input_parameter_names(mr);
-                assert(param_names.size() <= args.size());
+                auto param_shapes   = mr->get_parameter_shapes();
-                return std::equal(param_names.cbegin(),
+                assert(in_param_names.size() <= args.size());
-                                  param_names.cend(),
+                return std::equal(
-                                  args.cbegin(),
+                    in_param_names.cbegin(),
-                                  [&](auto p_name, auto a) {
+                    in_param_names.cend(),
-                                      return a.get_shape() == mr->get_parameter_shape(p_name);
+                    args.cbegin(),
-                                  });
+                    [&](auto p_name, auto a) { return a.get_shape() == param_shapes[p_name]; });
            });
        if(module_iter == submodule_list.end())
        {
            MIGRAPHX_THROW("SELECT_MODULE: no compatible submodules found for given input shapes");
        }
        auto* module_to_run = *module_iter;
-        std::unordered_map<std::string, argument> params;
+        std::unordered_map<std::string, argument> p_map;
-        // add input parameters
+        // add input parameters to parameter_map
-        auto param_names = module_to_run->get_parameter_names();
+        auto in_param_names = get_input_parameter_names(module_to_run);
-        assert(param_names.size() <= args.size());
+        assert(in_param_names.size() <= args.size());
-        std::transform(param_names.begin(),
+        std::transform(in_param_names.begin(),
-                       param_names.end(),
+                       in_param_names.end(),
                       args.begin(),
-                       std::inserter(params, params.end()),
+                       std::inserter(p_map, p_map.end()),
-                       [](auto&& name, auto&& a) { return std::make_pair(name, a); });
+                       [&](auto&& name, auto&& a) { return std::make_pair(name, a); });
-        auto results = run(module_to_run, params);
+        // One tuple output parameter in main module to multiple output parameters in submodule
+        auto out_param_names    = get_output_parameter_names(module_to_run);
+        auto output_sub_objects = args.back().get_sub_objects();
+        assert(out_param_names.size() == output_sub_objects.size());
+        std::transform(out_param_names.begin(),
+                       out_param_names.end(),
+                       output_sub_objects.begin(),
+                       std::inserter(p_map, p_map.end()),
+                       [&](auto&& name, auto&& a) {
+                           auto ps = module_to_run->get_parameter_shape(name);
+                           if(a.get_shape() != ps)
+                           {
+                               assert(ps.bytes() == a.get_shape().bytes());
+                               return std::make_pair(name, a.reshape(ps));
+                           }
+                           else
+                           {
+                               return std::make_pair(name, a);
+                           }
+                       });
+        auto results = run(module_to_run, p_map);
        return argument{results};
    }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
 };
 } // namespace op

--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
@@ -111,16 +111,15 @@ struct slice
        // For a static shape, old_lens will be adjusted to a new size
        // for those axes that are sliced.
        // For dynamic shape, the adjusted old_lens become the new max values,
-        // while updating the old mins and opts if possible.
+        // while updating the old mins and optimals if possible.
        std::vector<std::size_t> new_mins;
-        std::vector<std::size_t> new_opts;
        std::vector<std::size_t> old_lens;
        std::vector<std::size_t> old_strides;
+        // Doesn't handle optimals
        if(input_shape.dynamic())
        {
            old_lens = input_shape.max_lens();
            new_mins = input_shape.min_lens();
-            new_opts = input_shape.opt_lens();
        }
        else
        {
@@ -146,17 +145,11 @@ struct slice
                std::size_t sliced_min_length = ends[i] - starts[i];
                // if the slice size is smaller than maxes but larger than mins
                new_mins[axis] = std::min(sliced_min_length, new_mins[axis]);
-                auto sliced_opt_length = ends[i] - starts[i];
-                if(new_opts[axis] != 0)
-                    new_opts[axis] = sliced_opt_length;
-                if(new_opts[axis] < new_mins[axis] or new_opts[axis] > new_lens[axis])
-                    new_opts[axis] = 0;
            }
        }
        if(input_shape.dynamic())
        {
-            return shape{t, new_mins, new_lens, new_opts};
+            return shape{t, new_mins, new_lens, {}};
        }
        else
        {

--- a/src/include/migraphx/op/unsqueeze.hpp
+++ b/src/include/migraphx/op/unsqueeze.hpp
@@ -81,7 +81,7 @@ struct unsqueeze
            {
                if(std::find(axes.begin(), axes.end(), i) != axes.end())
                {
-                    dyn_dims.push_back({1, 1, 0});
+                    dyn_dims.push_back({1, 1});
                }
                else
                {

--- a/src/include/migraphx/pass_manager.hpp
+++ b/src/include/migraphx/pass_manager.hpp
@@ -39,6 +39,7 @@ struct module_pass_manager
    virtual module& get_module()                           = 0;
    virtual module* create_module(const std::string& name) = 0;
    virtual module* get_common_parent()                    = 0;
+    virtual module* get_root_module()                      = 0;
    virtual void run_pass(const pass& p)                   = 0;
    protected:

--- a/src/include/migraphx/process.hpp
+++ b/src/include/migraphx/process.hpp
@@ -26,6 +26,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/filesystem.hpp>
+#include <functional>
 #include <string>
 #include <memory>
@@ -36,6 +37,7 @@ struct process_impl;
 struct process
 {
+    using writer = std::function<void(const char*, std::size_t)>;
    process(const std::string& cmd);
    // move constructor
@@ -49,6 +51,7 @@ struct process
    process& cwd(const fs::path& p);
    void exec();
+    void write(std::function<void(process::writer)> pipe_in);
    private:
    std::unique_ptr<process_impl> impl;

--- a/src/opt/memory_coloring.cpp
+++ b/src/opt/memory_coloring.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,20 +21,27 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include <migraphx/memory_coloring.hpp>
+#ifndef MIGRAPHX_GUARD_RTGLIB_PROMOTE_LITERALS_HPP
-#include "memory_coloring_impl.hpp"
+#define MIGRAPHX_GUARD_RTGLIB_PROMOTE_LITERALS_HPP
+#include <string>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/config.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
-void memory_coloring::apply(module& m) const
+/**
+ * Replace literals in submodules with literals in the root module.
+ * Intended to allow for reuse of the literals between submodules.
+ */
+struct promote_literals
 {
-    if(not enabled(MIGRAPHX_DISABLE_MEMORY_COLORING{}))
+    std::string name() const { return "promote_literals"; }
-    {
+    void apply(module_pass_manager&) const;
-        memory_coloring_impl opt(&m, allocation_op, verify);
+};
-        opt.run();
-    }
-}
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
+#endif