Merge remote-tracking branch 'origin/develop' into ck-host-lib

baac1dab · Alan Turner · 830dff7a · 77042e30 · baac1dab · baac1dab
Commit baac1dab authored May 24, 2023 by Alan Turner
20 changed files
--- a/src/include/migraphx/op/convolution.hpp
+++ b/src/include/migraphx/op/convolution.hpp
@@ -38,6 +38,10 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Convolution operator. Does not support optimal dimensions for spatial dimensions. Returns empty
+ * optimals.
+ */
 struct convolution
 {
    std::vector<std::size_t> padding  = {0, 0};
@@ -148,7 +152,7 @@ struct convolution
            else
            {
                auto l = input_shape.lens().at(0);
-                output_dyn_dims.push_back({l, l, 0});
+                output_dyn_dims.push_back({l, l});
            }
        };
@@ -165,25 +169,30 @@ struct convolution
                if(x_shape.dynamic())
                {
                    auto x = x_shape.dyn_dims()[i + 2];
-                    output_dyn_dims.push_back(shape::dynamic_dimension{
+                    std::set<std::size_t> optimals{};
-                        ceil_div(x.min, s), ceil_div(x.max, s), ceil_div(x.opt, s)});
+                    std::transform(x.optimals.begin(),
+                                   x.optimals.end(),
+                                   std::inserter(optimals, optimals.begin()),
+                                   [&](auto o) { return ceil_div(o, s); });
+                    output_dyn_dims.push_back(
+                        shape::dynamic_dimension{ceil_div(x.min, s), ceil_div(x.max, s), optimals});
                }
                else
                {
                    auto od = ceil_div(x_shape.lens()[i + 2], s);
-                    output_dyn_dims.push_back(shape::dynamic_dimension{od, od, 0});
+                    output_dyn_dims.push_back(shape::dynamic_dimension{od, od});
                }
            }
        }
        else
        {
+            // Does not compute for optimals
            auto min_spatial_dims = calc_conv_lens(x_shape.min_lens(), w_shape.max_lens());
            auto max_spatial_dims = calc_conv_lens(x_shape.max_lens(), w_shape.min_lens());
-            auto opt_spatial_dims = calc_conv_lens(x_shape.opt_lens(), w_shape.opt_lens());
            for(size_t i = 0; i < num_spatial_dims; ++i)
            {
-                output_dyn_dims.push_back(shape::dynamic_dimension{
+                output_dyn_dims.push_back(
-                    min_spatial_dims[i], max_spatial_dims[i], opt_spatial_dims[i]});
+                    shape::dynamic_dimension{min_spatial_dims[i], max_spatial_dims[i], {}});
            }
        }
        return shape{x_shape.type(), output_dyn_dims};

--- a/src/include/migraphx/op/dequantizelinear.hpp
+++ b/src/include/migraphx/op/dequantizelinear.hpp
@@ -37,10 +37,23 @@ namespace op {
 struct dequantizelinear
 {
+    value attributes() const
+    {
+        // Note: point_op attribute is not used in this op. Instead, in
+        // gpu compilation pipeline, rewrite_quantization will be invoked
+        // from generate_pointwise() to rewrite this op.
+        return {{"pointwise", true}};
+    }
    std::string name() const { return "dequantizelinear"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.same_dims();
+        check_shapes{inputs, *this}.same_dims().has(2, 3);
+        if(inputs.size() == 3 and inputs[0].type() != inputs[2].type())
+        {
+            MIGRAPHX_THROW("DEQUANTIZELINEAR: Zero point and input should be the same type.");
+        }
        return {inputs[1].type(), inputs[0].lens(), inputs[0].strides()};
    }

--- a/src/include/migraphx/op/flatten.hpp
+++ b/src/include/migraphx/op/flatten.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/dyn_output.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -59,27 +60,22 @@ struct flatten
        auto s = inputs[0];
        if(s.dynamic())
        {
+            // Doesn't handle optimals
            auto min_lens = s.min_lens();
            auto max_lens = s.max_lens();
-            auto opt_lens = s.opt_lens();
            // If any of the opt values is 0, output opt will be 0
            shape::dynamic_dimension x = {
                std::accumulate(
                    min_lens.begin(), min_lens.begin() + axis, std::size_t{1}, std::multiplies<>{}),
                std::accumulate(
                    max_lens.begin(), max_lens.begin() + axis, std::size_t{1}, std::multiplies<>{}),
-                std::accumulate(opt_lens.begin(),
+                {}};
-                                opt_lens.begin() + axis,
-                                std::size_t{1},
-                                std::multiplies<>{})};
            shape::dynamic_dimension y = {
                std::accumulate(
                    min_lens.begin() + axis, min_lens.end(), std::size_t{1}, std::multiplies<>{}),
                std::accumulate(
                    max_lens.begin() + axis, max_lens.end(), std::size_t{1}, std::multiplies<>{}),
-                std::accumulate(
+                {}};
-                    opt_lens.begin() + axis, opt_lens.end(), std::size_t{1}, std::multiplies<>{}),
-            };
            return {s.type(), {x, y}};
        }
        else

--- a/src/include/migraphx/op/gathernd.hpp
+++ b/src/include/migraphx/op/gathernd.hpp
@@ -121,7 +121,7 @@ struct gathernd
            // A rank 0 output is a scalar
            if(output_ndim == 0)
-                return shape(data_shape.type(), {shape::dynamic_dimension({1, 1, 0})});
+                return shape(data_shape.type(), {shape::dynamic_dimension({1, 1})});
            // Part of the output shape comes from indices tensor, part from data tensor
            std::vector<shape::dynamic_dimension> output_dims(output_ndim);

--- a/src/include/migraphx/op/nonmaxsuppression.hpp
+++ b/src/include/migraphx/op/nonmaxsuppression.hpp
@@ -119,8 +119,8 @@ struct nonmaxsuppression
                fixed_shape_error_check();
            }
            std::vector<shape::dynamic_dimension> out_lens = {};
-            out_lens.push_back({0, max_num_boxes, 0});
+            out_lens.push_back({0, max_num_boxes});
-            out_lens.push_back({3, 3, 0});
+            out_lens.push_back({3, 3});
            return {shape::int64_type, out_lens};
        }
        else
@@ -143,16 +143,22 @@ struct nonmaxsuppression
        void sort()
        {
-            std::sort(x.begin(), x.end());
+            if(x[0] > x[1])
-            std::sort(y.begin(), y.end());
+            {
+                std::swap(x[0], x[1]);
+            }
+            if(y[0] > y[1])
+            {
+                std::swap(y[0], y[1]);
+            }
        }
        std::array<double, 2>& operator[](std::size_t i) { return i == 0 ? x : y; }
        double area() const
        {
-            assert(std::is_sorted(x.begin(), x.end()));
+            assert(x[0] <= x[1]);
-            assert(std::is_sorted(y.begin(), y.end()));
+            assert(y[0] <= y[1]);
            return (x[1] - x[0]) * (y[1] - y[0]);
        }
    };
@@ -190,14 +196,10 @@ struct nonmaxsuppression
        {
            intersection[i][0] = std::max(b1[i][0], b2[i][0]);
            intersection[i][1] = std::min(b1[i][1], b2[i][1]);
-        }
+            if(intersection[i][0] > intersection[i][1])
+            {
-        std::vector<std::array<double, 2>> bbox = {intersection.x, intersection.y};
+                return false;
-        if(std::any_of(bbox.begin(), bbox.end(), [](auto bx) {
+            }
-               return not std::is_sorted(bx.begin(), bx.end());
-           }))
-        {
-            return false;
        }
        const double area1             = b1.area();
@@ -265,31 +267,31 @@ struct nonmaxsuppression
            auto batch_boxes_start = boxes.begin() + batch_idx * num_boxes * 4;
            auto boxes_heap = filter_boxes_by_score(scores_start, num_boxes, score_threshold);
            selected_boxes_inside_class.clear();
-            // Get the next box with top score, filter by iou_threshold
            while(not boxes_heap.empty() &&
                  selected_boxes_inside_class.size() < max_output_boxes_per_class)
            {
-                // Check with existing selected boxes for this class, remove box if it
+                // select next top scorer box and remove any boxes from boxes_heap that exceeds IOU
-                // exceeds the IOU (Intersection Over Union) threshold
+                // threshold with the selected box
                const auto next_top_score = boxes_heap.top();
-                bool not_selected =
+                boxes_heap.pop();
-                    std::any_of(selected_boxes_inside_class.begin(),
+                selected_boxes_inside_class.push_back(next_top_score);
-                                selected_boxes_inside_class.end(),
+                selected_indices.push_back(batch_idx);
-                                [&](auto selected_index) {
+                selected_indices.push_back(class_idx);
-                                    return this->suppress_by_iou(
+                selected_indices.push_back(next_top_score.second);
-                                        batch_box(batch_boxes_start, next_top_score.second),
+                std::priority_queue<std::pair<double, int64_t>> remainder_boxes;
-                                        batch_box(batch_boxes_start, selected_index.second),
+                while(not boxes_heap.empty())
-                                        iou_threshold);
-                                });
-                if(not not_selected)
                {
-                    selected_boxes_inside_class.push_back(next_top_score);
+                    auto iou_candidate_box = boxes_heap.top();
-                    selected_indices.push_back(batch_idx);
+                    if(not this->suppress_by_iou(
-                    selected_indices.push_back(class_idx);
+                           batch_box(batch_boxes_start, iou_candidate_box.second),
-                    selected_indices.push_back(next_top_score.second);
+                           batch_box(batch_boxes_start, next_top_score.second),
+                           iou_threshold))
+                    {
+                        remainder_boxes.push(iou_candidate_box);
+                    }
+                    boxes_heap.pop();
                }
-                boxes_heap.pop();
+                boxes_heap = remainder_boxes;
            }
        });
        std::copy(selected_indices.begin(), selected_indices.end(), output.begin());

--- a/src/include/migraphx/op/normalize_attribute.hpp
+++ b/src/include/migraphx/op/normalize_attribute.hpp
@@ -31,18 +31,30 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-// different attributes
+/**
-// 1) use_input(default)/use_output
+ * `normalize_attribute` settings:
-// 2) use_rank(default)/use_len
+ * Note that default options are not included as enums.
-// 3) clip_min(default)/not_clip_min
+ * 1. `use_input` (default) vs. `use_output`:
-//   3.1) include_min(default)/exclude_min
+ *  Affects the rank of the attribute.
-// 4) clip_max(default)/not_clip_max
+ *  `use_input -> lens.size()`, `use_output -> lens.size() + vec.size()`.
-//   4.1) exclude_max(default)/include_max
+ * 2. use_rank (default) vs use_len:
-// 5) normalize padding
+ *  `use_rank` sets the max value/index of the attribute as the rank of lens.
+ *  `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
+ * 3. `clip_min` vs. `not_clip_min` (default):
+ *  Clip values less than the minimum to the minimum or not.
+ * 4. `include_min` vs. `exclude_min` (default):
+ *  Include or exclude the minimum value/index for range checking and clipping.
+ * 5. `clip_max` vs. `not_clip_max` (default):
+ *  Clip values greater than the maximum or not.
+ * 6. `include_max` vs. `exclude_max` (default):
+ *  Include or exclude the maximum value/index for range checking and clipping.
+ * 7. `normalize_padding`:
+ *  To normalize the padding to `2*(pad ndim)` dimensions.
+ */
 enum class normalize_attribute
 {
-    use_len,
    use_output,
+    use_len,
    clip_max,
    clip_min,
    include_max,

--- a/src/include/migraphx/op/pointwise.hpp
+++ b/src/include/migraphx/op/pointwise.hpp
@@ -45,14 +45,15 @@ struct pointwise
        {
            MIGRAPHX_THROW("should have one submodule.");
        }
-        auto* pm    = mods.front();
+        auto* pm = mods.front();
+        if(pm->get_output_shapes().size() != 1)
+            MIGRAPHX_THROW("pointwise should have only one output.");
+        if(inputs.empty())
+            MIGRAPHX_THROW("pointwise should have at least one input");
        auto pnames = pm->get_parameter_names();
        std::sort(pnames.begin(), pnames.end());
        check_shapes{inputs, *this}.has(pnames.size()).same_dims();
-        if(pm->get_output_shapes().size() != 1)
-            MIGRAPHX_THROW("submodule should have only one output.");
        auto type = pm->get_output_shapes().front().type();
        // Scalar output if all inputs are scalar

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
@@ -89,25 +89,17 @@ struct pooling
        std::vector<std::size_t> output_lens{};
        for(size_t i = 0; i < kdims; ++i)
        {
-            if(input_lens[i + 2] == 0)
+            std::size_t padding_factor = 2 * padding[i];
-            {
+            if(padding.size() == 2 * kdims)
-                // handle opt = 0
+                padding_factor = padding[i] + padding[i + kdims];
-                output_lens.push_back(0);
+            assert(input_lens[i + 2] + padding_factor >= lengths[i]);
-            }
+            std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
-            else
+            std::size_t len =
-            {
+                (ceil_mode)
-                std::size_t padding_factor = 2 * padding[i];
+                    ? dim_size / stride[i] +
-                if(padding.size() == 2 * kdims)
+                          static_cast<std::size_t>((dim_size % stride[i] != 0)) // ceil uint divide
-                    padding_factor = padding[i] + padding[i + kdims];
+                    : dim_size / stride[i];                                     // floor divide
-                assert(input_lens[i + 2] + padding_factor >= lengths[i]);
+            output_lens.push_back(len + 1);
-                std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
-                std::size_t len =
-                    (ceil_mode)
-                        ? dim_size / stride[i] + static_cast<std::size_t>((dim_size % stride[i] !=
-                                                                           0)) // ceil uint divide
-                        : dim_size / stride[i];                                // floor divide
-                output_lens.push_back(len + 1);
-            }
        }
        return output_lens;
    }
@@ -134,19 +126,19 @@ struct pooling
            {
                for(size_t i = 0; i < kdims; ++i)
                {
-                    output_dyn_dims.push_back(shape::dynamic_dimension{1, 1, 1});
+                    output_dyn_dims.push_back(shape::dynamic_dimension{1, 1});
                }
                return {input.type(), output_dyn_dims};
            }
            else
            {
+                // does not compute for optimals
                auto min_spatial_dims = calc_spatial_dim_out(input.min_lens(), kdims);
                auto max_spatial_dims = calc_spatial_dim_out(input.max_lens(), kdims);
-                auto opt_spatial_dims = calc_spatial_dim_out(input.opt_lens(), kdims);
                for(size_t i = 0; i < kdims; ++i)
                {
-                    output_dyn_dims.push_back(shape::dynamic_dimension{
+                    output_dyn_dims.push_back(
-                        min_spatial_dims[i], max_spatial_dims[i], opt_spatial_dims[i]});
+                        shape::dynamic_dimension{min_spatial_dims[i], max_spatial_dims[i], {}});
                }
                return {input.type(), output_dyn_dims};
            }

--- a/src/include/migraphx/op/quantizelinear.hpp
+++ b/src/include/migraphx/op/quantizelinear.hpp
@@ -38,9 +38,22 @@ namespace op {
 struct quantizelinear
 {
    std::string name() const { return "quantizelinear"; }
+    value attributes() const
+    {
+        // Note: point_op attribute is not used in this op. Instead, in
+        // gpu compilation pipeline, rewrite_quantization will be invoked
+        // from generate_pointwise() to rewrite this op.
+        return {{"pointwise", true}};
+    }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.same_dims();
+        check_shapes{inputs, *this}.same_dims().has(2, 3);
+        if(inputs[0].type() != inputs[1].type())
+        {
+            MIGRAPHX_THROW("QUANTIZELINEAR: Scales and input must be the same type");
+        }
        if(inputs.size() == 3)
        {
            return {inputs[2].type(), inputs[0].lens(), inputs[0].strides()};
@@ -61,17 +74,15 @@ struct quantizelinear
        argument result{output_shape};
        visit_all(result, y_zero_point)([&](auto output, auto zero_pts) {
-            x.visit([&](auto input) {
+            visit_all(x, y_scale)([&](auto input, auto scales) {
-                y_scale.visit([&](auto scales) {
+                using quant_type = typename decltype(output)::value_type;
-                    using quant_type = typename decltype(output)::value_type;
+                auto min_value   = std::numeric_limits<quant_type>::min();
-                    auto min_value   = std::numeric_limits<quant_type>::min();
+                auto max_value   = std::numeric_limits<quant_type>::max();
-                    auto max_value   = std::numeric_limits<quant_type>::max();
+                par_for(output_shape.elements(), [&](auto i) {
-                    par_for(output_shape.elements(), [&](auto i) {
+                    int64_t quantized = static_cast<int64_t>(std::round(input[i] / scales[i])) +
-                        int64_t quantized = static_cast<int64_t>(std::round(input[i] / scales[i])) +
+                                        static_cast<int64_t>(zero_pts[i]);
-                                            static_cast<int64_t>(zero_pts[i]);
+                    output[i] = std::max(static_cast<int64_t>(min_value),
-                        output[i] = std::max(static_cast<int64_t>(min_value),
+                                         std::min(static_cast<int64_t>(max_value), quantized));
-                                             std::min(static_cast<int64_t>(max_value), quantized));
-                    });
                });
            });
        });

--- a/src/include/migraphx/op/reduce_op.hpp
+++ b/src/include/migraphx/op/reduce_op.hpp
@@ -91,7 +91,7 @@ struct reduce_op : op_name<Derived>
    {
        value normalize;
        normalize["axes"] = value::array{normalize_attribute::include_min};
-        return {{"normalize_axes", normalize}};
+        return {{"normalize_axes", normalize}, {"reduce", true}};
    }
    std::vector<int64_t> tune_axes(std::size_t n_dim) const
@@ -123,9 +123,7 @@ struct reduce_op : op_name<Derived>
            auto tuned_axes      = tune_axes(output_dyn_dims.size());
            for(const auto& axis : tuned_axes)
            {
-                // At the time of writing, there's no functional difference between
+                output_dyn_dims[axis] = {1, 1};
-                // optimum of 0 (no opt) or 1.
-                output_dyn_dims[axis] = {1, 1, 0};
            }
            return shape{s.type(), output_dyn_dims};

--- a/src/include/migraphx/op/reverse.hpp
+++ b/src/include/migraphx/op/reverse.hpp
@@ -28,6 +28,7 @@
 #include <vector>
 #include <cmath>
 #include <utility>
+#include <migraphx/check_shapes.hpp>
 #include <migraphx/config.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
@@ -60,6 +61,7 @@ struct reverse
    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
+        check_shapes{inputs, *this}.has(1);
        return inputs[0].with_lens(inputs[0].lens());
    }

--- a/src/include/migraphx/op/select_module.hpp
+++ b/src/include/migraphx/op/select_module.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_SELECT_MODULE_HPP
+#define MIGRAPHX_GUARD_OPERATORS_SELECT_MODULE_HPP
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/module.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct select_module
+{
+    shape output_dyn_shapes;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.output_dyn_shapes, "output_dyn_shapes"));
+    }
+    std::string name() const { return "select_module"; }
+    shape compute_shape(const std::vector<shape>& inputs, const std::vector<module_ref>&) const
+    {
+        check_shapes{inputs, *this, true}.has_at_least(1);
+        return shape{output_dyn_shapes};
+    }
+    std::vector<std::string> get_input_parameter_names(module_ref mod) const
+    {
+        auto param_names = mod->get_parameter_names();
+        std::vector<std::string> ret;
+        std::copy_if(param_names.cbegin(),
+                     param_names.cend(),
+                     std::back_inserter(ret),
+                     [](auto pn) { return not contains(pn, "#output_"); });
+        std::sort(ret.begin(), ret.end());
+        return ret;
+    }
+    std::vector<std::string> get_output_parameter_names(module_ref mod) const
+    {
+        auto param_names = mod->get_parameter_names();
+        std::vector<std::string> ret;
+        std::copy_if(param_names.cbegin(),
+                     param_names.cend(),
+                     std::back_inserter(ret),
+                     [](auto pn) { return contains(pn, "#output_"); });
+        // needs to be sorted to ensure output parameter ordering
+        std::sort(ret.begin(), ret.end());
+        return ret;
+    }
+    argument compute(const shape&,
+                     const std::vector<argument>& args,
+                     const std::vector<module_ref>& submodule_list,
+                     const std::function<std::vector<argument>(
+                         module_ref&, const std::unordered_map<std::string, argument>&)>& run) const
+    {
+        // Find submodule with input parameter shapes exactly the same as the input instruction
+        // arguments. Assuming instruction arguments are in the same order as the instruction
+        // parameters.
+        auto module_iter =
+            std::find_if(submodule_list.cbegin(), submodule_list.cend(), [&](module_ref mr) {
+                auto in_param_names = get_input_parameter_names(mr);
+                auto param_shapes   = mr->get_parameter_shapes();
+                assert(in_param_names.size() <= args.size());
+                return std::equal(
+                    in_param_names.cbegin(),
+                    in_param_names.cend(),
+                    args.cbegin(),
+                    [&](auto p_name, auto a) { return a.get_shape() == param_shapes[p_name]; });
+            });
+        if(module_iter == submodule_list.end())
+        {
+            MIGRAPHX_THROW("SELECT_MODULE: no compatible submodules found for given input shapes");
+        }
+        auto* module_to_run = *module_iter;
+        std::unordered_map<std::string, argument> p_map;
+        // add input parameters to parameter_map
+        auto in_param_names = get_input_parameter_names(module_to_run);
+        assert(in_param_names.size() <= args.size());
+        std::transform(in_param_names.begin(),
+                       in_param_names.end(),
+                       args.begin(),
+                       std::inserter(p_map, p_map.end()),
+                       [&](auto&& name, auto&& a) { return std::make_pair(name, a); });
+        // One tuple output parameter in main module to multiple output parameters in submodule
+        auto out_param_names    = get_output_parameter_names(module_to_run);
+        auto param_shapes       = module_to_run->get_parameter_shapes();
+        auto output_sub_objects = args.back().get_sub_objects();
+        assert(out_param_names.size() == output_sub_objects.size());
+        std::transform(out_param_names.begin(),
+                       out_param_names.end(),
+                       output_sub_objects.begin(),
+                       std::inserter(p_map, p_map.end()),
+                       [&](auto&& name, auto&& a) {
+                           auto ps = param_shapes.at(name);
+                           if(a.get_shape() != ps)
+                           {
+                               assert(ps.bytes() <= a.get_shape().bytes());
+                               return std::make_pair(name, a.reshape(ps));
+                           }
+                           else
+                           {
+                               return std::make_pair(name, a);
+                           }
+                       });
+        auto results = run(module_to_run, p_map);
+        return argument{results};
+    }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -27,6 +27,7 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
@@ -46,6 +47,10 @@ struct slice
        return pack(f(self.axes, "axes"), f(self.starts, "starts"), f(self.ends, "ends"));
    }
+    /**
+     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are in
+     * limits.
+     */
    value attributes() const
    {
        value normalize     = value::object{};
@@ -65,14 +70,6 @@ struct slice
    std::string name() const { return "slice"; }
-    auto fix_index(const std::vector<std::size_t>& lens, std::size_t axis, int64_t index) const
-    {
-        int64_t r = std::min(index, static_cast<int64_t>(lens[axis]));
-        if(r < 0)
-            r += lens[axis];
-        return std::size_t(r);
-    }
    auto compute_offset(const shape& s) const
    {
        const std::vector<std::size_t>& lens    = s.lens();
@@ -83,14 +80,14 @@ struct slice
            for(std::size_t i = 0; i < axes.size(); i++)
            {
                auto axis = axes[i];
-                offset += fix_index(lens, axis, starts[i]) * strides[axis];
+                offset += starts[i] * strides[axis];
            }
        }
        else
        {
            for(std::size_t axis = 0; axis < lens.size(); axis++)
            {
-                offset += fix_index(lens, axis, starts[axis]) * strides[axis];
+                offset += starts[axis] * strides[axis];
            }
        }
        return offset;
@@ -98,37 +95,74 @@ struct slice
    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        auto input_shape        = inputs[0];
+        check_shapes{inputs, *this, true}.has(1);
-        auto t                  = input_shape.type();
+        auto input_shape = inputs[0];
-        const auto& old_lens    = input_shape.lens();
+        auto t           = input_shape.type();
-        const auto& old_strides = input_shape.strides();
-        if(std::any_of(
+        // TODO:  When support for dynamic shapes is added to normalize_attributes,
-               axes.begin(), axes.end(), [&](auto i) { return (i >= old_lens.size() and i < 0); }))
+        //  remove this restriction.
+        if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
+               return not input_shape.dyn_dims()[axis].is_fixed();
+           }))
        {
-            MIGRAPHX_THROW("SLICE: input axis " + to_string_range(axes) + " out of range");
+            MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
        }
-        if(starts.size() != axes.size() or axes.size() != ends.size())
+        // For a static shape, old_lens will be adjusted to a new size
+        // for those axes that are sliced.
+        // For dynamic shape, the adjusted old_lens become the new max values,
+        // while updating the old mins and optimals if possible.
+        std::vector<std::size_t> new_mins;
+        std::vector<std::size_t> old_lens;
+        std::vector<std::size_t> old_strides;
+        // Doesn't handle optimals
+        if(input_shape.dynamic())
        {
-            MIGRAPHX_THROW("SLICE: inconsistent sizes");
+            old_lens = input_shape.max_lens();
+            new_mins = input_shape.min_lens();
+        }
+        else
+        {
+            old_lens = input_shape.lens();
+            // For static shape (including during eval step after a dynamic input) the strides are
+            // indexed into the pre-slice array, so they are larger than the apparent size of the
+            // resulting shape.
+            old_strides = input_shape.strides();
        }
        std::vector<std::size_t> new_lens = old_lens;
        for(std::size_t i = 0; i < axes.size(); i++)
        {
-            auto axis = axes[i];
+            auto axis            = axes[i];
-            new_lens[axis] =
+            size_t sliced_length = ends[i] - starts[i];
-                fix_index(old_lens, axis, ends[i]) - fix_index(old_lens, axis, starts[i]);
+            // A Numpy indexing convention: a slice size larger than the actual dimension
+            // is legal and the "ends" value is clipped to the axis size
+            new_lens[axis] = std::min(new_lens[axis], sliced_length);
+            if(input_shape.dynamic())
+            {
+                // TODO: when non-fixed shape slicing is allowed, this will be different than
+                // sliced_length, making use of TBD start/end values.
+                std::size_t sliced_min_length = ends[i] - starts[i];
+                // if the slice size is smaller than maxes but larger than mins
+                new_mins[axis] = std::min(sliced_min_length, new_mins[axis]);
+            }
+        }
+        if(input_shape.dynamic())
+        {
+            return shape{t, new_mins, new_lens, {}};
+        }
+        else
+        {
+            return shape{t, new_lens, old_strides};
        }
-        return shape{t, new_lens, old_strides};
    }
-    argument compute(shape output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        auto input  = args[0];
+        auto input = args[0];
-        auto offset = compute_offset(input.get_shape()) * output_shape.type_size();
-        return {std::move(output_shape), [=] { return input.data() + offset; }};
+        auto offset = compute_offset(input.get_shape()) * dyn_out.computed_shape.type_size();
+        return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/op/unsqueeze.hpp
+++ b/src/include/migraphx/op/unsqueeze.hpp
@@ -81,7 +81,7 @@ struct unsqueeze
            {
                if(std::find(axes.begin(), axes.end(), i) != axes.end())
                {
-                    dyn_dims.push_back({1, 1, 0});
+                    dyn_dims.push_back({1, 1});
                }
                else
                {
@@ -95,13 +95,10 @@ struct unsqueeze
            auto type        = input_shape.type();
            auto old_lens    = input_shape.lens();
            auto old_strides = input_shape.strides();
-            if(input_shape.scalar())
+            auto is_scalar   = input_shape.scalar();
-            {
-                if(old_lens.size() == 1 and old_lens.front() == 1)
+            if(is_scalar and old_lens.size() == 1 and old_lens.front() == 1)
-                    return shape{type, old_lens};
+                return shape{type, old_lens};
-                else
-                    MIGRAPHX_THROW("UNSQUEEZE: Input must be a scalar");
-            }
            if(steps.size() > axes.size())
                MIGRAPHX_THROW("UNSQUEEZE: Steps provided with no axis");
@@ -121,13 +118,15 @@ struct unsqueeze
                        step = steps[axis_idx];
                    if(step == 0)
                        MIGRAPHX_THROW("UNSQUEEZE: step must be non-zero");
+                    if(is_scalar and step != 1)
+                        MIGRAPHX_THROW("UNSQUEEZE: step must be 1 when input is scalar");
                    new_lens[i] = step;
                    if(p < old_strides.size())
                    {
                        if((old_lens[p] % step) != 0)
                            MIGRAPHX_THROW("UNSQUEEZE: Axis dimenstion is not divisible by step");
                        old_lens[p] /= step;
-                        new_strides[i] = old_strides[p] * old_lens[p];
+                        new_strides[i] = is_scalar ? 1 : old_strides[p] * old_lens[p];
                    }
                    else
                    {

--- a/src/include/migraphx/op/where.hpp
+++ b/src/include/migraphx/op/where.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -42,9 +42,17 @@ struct where
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3).same_dims();
+        check_shapes{inputs, *this, true}.has(3).same_dims();
        auto s1 = inputs.at(1);
        auto s2 = inputs.at(2);
+        if(s1.dynamic() or s2.dynamic())
+        {
+            if(s1 == s2)
+                return s1;
+            MIGRAPHX_THROW("WHERE: dynamic input shapes must be the same");
+        }
+        // Compare two static shapes, returning a standard shape
        if(s1 == s2 and s1.packed())
        {
            return s1;
@@ -63,12 +71,12 @@ struct where
        }
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        visit_all(result, args[1], args[2])([&](auto output, const auto x, const auto y) {
            args[0].visit([&](const auto condition) {
-                par_for(output_shape.elements(),
+                par_for(dyn_out.computed_shape.elements(),
                        [&](auto i) { output[i] = condition[i] ? x[i] : y[i]; });
            });
        });

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -140,6 +140,8 @@ template <class T>
 auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
    -> decltype(x.normalize_compute_shape(inputs))
 {
+    if(inputs.empty())
+        MIGRAPHX_THROW("At least one input is required for " + x.name());
    dependent_type<operation, T> y = x;
    normalize_attributes(y, inputs[0].max_lens());
    return any_cast<T>(y).normalize_compute_shape(inputs);

--- a/src/include/migraphx/pass_manager.hpp
+++ b/src/include/migraphx/pass_manager.hpp
@@ -39,6 +39,7 @@ struct module_pass_manager
    virtual module& get_module()                           = 0;
    virtual module* create_module(const std::string& name) = 0;
    virtual module* get_common_parent()                    = 0;
+    virtual module* get_root_module()                      = 0;
    virtual void run_pass(const pass& p)                   = 0;
    protected:

--- a/src/include/migraphx/process.hpp
+++ b/src/include/migraphx/process.hpp
@@ -26,6 +26,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/filesystem.hpp>
+#include <functional>
 #include <string>
 #include <memory>
@@ -36,6 +37,7 @@ struct process_impl;
 struct process
 {
+    using writer = std::function<void(const char*, std::size_t)>;
    process(const std::string& cmd);
    // move constructor
@@ -49,6 +51,7 @@ struct process
    process& cwd(const fs::path& p);
    void exec();
+    void write(std::function<void(process::writer)> pipe_in);
    private:
    std::unique_ptr<process_impl> impl;

--- a/test/context_test.cpp
+++ b/test/context_test.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,20 +21,27 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include <migraphx/serialize.hpp>
+#ifndef MIGRAPHX_GUARD_RTGLIB_PROMOTE_LITERALS_HPP
-#include <migraphx/context.hpp>
+#define MIGRAPHX_GUARD_RTGLIB_PROMOTE_LITERALS_HPP
-#include <migraphx/ref/context.hpp>
-#include <migraphx/functional.hpp>
-#include <test.hpp>
-TEST_CASE(context)
+#include <string>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/config.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+/**
+ * Replace literals in submodules with literals in the root module.
+ * Intended to allow for reuse of the literals between submodules.
+ */
+struct promote_literals
 {
-    migraphx::context ctx = migraphx::ref::context{};
+    std::string name() const { return "promote_literals"; }
-    migraphx::value v     = ctx.to_value();
+    void apply(module_pass_manager&) const;
-    EXPECT(v.empty());
+};
-    migraphx::context cpu_ctx = migraphx::ref::context{};
+} // namespace MIGRAPHX_INLINE_NS
-    cpu_ctx.from_value(v);
+} // namespace migraphx
-}
-int main(int argc, const char* argv[]) { test::run(argc, argv); }
+#endif
--- a/src/include/migraphx/reflect.hpp
+++ b/src/include/migraphx/reflect.hpp
@@ -78,7 +78,7 @@ template <class T>
 struct wrapper
 {
    using type = typename remove_rvalue_reference<T>::type;
-    type data;
+    type data; // NOLINT
    type get() const { return data; }
 };