Merge branch 'develop' into add_parity_check_ci

1530ec24 · Ted Themistokleous · GitHub · 5c98fcb0 · c2e01b10 · 1530ec24
Unverified Commit 1530ec24 authored Sep 18, 2023 by Ted Themistokleous Committed by GitHub Sep 18, 2023
20 changed files
--- a/src/include/migraphx/op/reduce_op.hpp
+++ b/src/include/migraphx/op/reduce_op.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -163,7 +163,7 @@ struct reduce_op : op_name<Derived>
        auto& self        = static_cast<const Derived&>(*this);
        auto data_idx     = out_idx;
        accumulator val   = self.init();
-        shape_for_each(batch_shape, [&](auto b_idx) {
+        shape_for_each(batch_shape, [&](const auto& b_idx) {
            this->tune_dims(tuned_axes, b_idx, data_idx);
            accumulator x = input(data_idx.begin(), data_idx.end());
            val           = self.op()(accumulator{self.input()(x)}, val);

--- a/src/include/migraphx/op/reverse.hpp
+++ b/src/include/migraphx/op/reverse.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -70,13 +70,13 @@ struct reverse
        argument result{s};
        auto lens = s.lens();
        visit_all(result, args.front())([&](auto output, auto input) {
-            shape_for_each(s, [&](const auto& out_idx) {
+            shape_for_each(s, [&](const auto& out_idx_v, size_t out_idx) {
-                auto in_idx = out_idx;
+                auto in_idx = out_idx_v;
                for(const auto& axis : axes)
                {
-                    in_idx[axis] = lens[axis] - 1 - out_idx[axis];
+                    in_idx[axis] = lens[axis] - 1 - out_idx_v[axis];
                }
-                output[s.index(out_idx)] = input[s.index(in_idx)];
+                output[out_idx] = input[s.index(in_idx)];
            });
        });

--- a/src/include/migraphx/op/roialign.hpp
+++ b/src/include/migraphx/op/roialign.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -113,10 +113,9 @@ struct roialign
    {
        std::vector<pos_weight> results(bin_grid_size[0] * bin_grid_size[1] * output_height *
                                        output_width);
-        shape_for_each(comp_s, [&](auto idx) {
+        shape_for_each(comp_s, [&](const auto& idx_v, size_t index) {
-            std::array<std::size_t, 2> p = {idx[0], idx[1]};
+            std::array<std::size_t, 2> p = {idx_v[0], idx_v[1]};
-            std::array<std::size_t, 2> i = {idx[2], idx[3]};
+            std::array<std::size_t, 2> i = {idx_v[2], idx_v[3]};
-            auto index                   = comp_s.index(idx);
            std::array<float, 2> xy{};
            std::array<int64_t, 2> low{};
@@ -255,7 +254,7 @@ struct roialign
                std::vector<std::size_t> comp_lens1 = {channels, out_dims[0], out_dims[1]};
                shape comp_s1{migraphx::shape::float_type, comp_lens1};
                std::vector<int64_t> vec_index(channels, 0);
-                shape_for_each(comp_s1, [&](auto idx) {
+                shape_for_each(comp_s1, [&](const auto& idx) {
                    auto c  = idx[0];
                    auto ph = idx[1];
                    auto pw = idx[2];

--- a/src/include/migraphx/op/scatter.hpp
+++ b/src/include/migraphx/op/scatter.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
@@ -27,19 +27,34 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
-#include <migraphx/dyn_output.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/normalize_attributes.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Slice operator that accepts variable axes, starts and ends.
+ *
+ * Attributes:
+ * axes: constant axes to slice over (optional)
+ * starts: constant slice starting indices (optional)
+ * ends: constant slice ending indices (optional)
+ *
+ * Parameters:
+ * data: the input tensor to slice (dynamic or static shape)
+ * input_starts: starting indicies of slice (optional, static shape)
+ * input_ends: ending indicies of slice (optional, static shape)
+ * input_axes: axes to slice over (optional, static shape)
+ */
 struct slice
 {
-    std::vector<int64_t> axes;
+    std::vector<int64_t> axes{};
-    std::vector<int64_t> starts;
+    std::vector<int64_t> starts{};
-    std::vector<int64_t> ends;
+    std::vector<int64_t> ends{};
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -48,8 +63,8 @@ struct slice
    }
    /**
-     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are in
+     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are
-     * limits.
+     * within limits.
     */
    value attributes() const
    {
@@ -70,6 +85,90 @@ struct slice
    std::string name() const { return "slice"; }
+    /**
+     * Computes the slice output shape dimensions for given starts, ends,and axes.
+     * Templated to also handle tensor views.
+     * Possibily different type between [in_starts, in_ends] and [in_axes] if in_axes is this
+     * object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
+     */
+    template <class A, class B>
+    std::vector<std::size_t>
+    lens_calc(const std::vector<std::size_t>& lengths, A in_starts, A in_ends, B in_axes) const
+    {
+        auto new_lens = lengths;
+        for(std::size_t i = 0; i < in_axes.size(); ++i)
+        {
+            auto axis      = in_axes[i];
+            new_lens[axis] = in_ends[i] - in_starts[i];
+        }
+        return new_lens;
+    }
+    shape normalize_compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1, 3, 4);
+        auto input_shape = inputs[0];
+        if(inputs.size() == 1)
+        {
+            auto t = input_shape.type();
+            if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
+                   return not input_shape.dyn_dims()[axis].is_fixed();
+               }))
+            {
+                MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+            }
+            if(input_shape.dynamic())
+            {
+                return shape{t,
+                             lens_calc(input_shape.min_lens(), starts, ends, axes),
+                             lens_calc(input_shape.max_lens(), starts, ends, axes),
+                             {}};
+            }
+            else
+            {
+                return shape{
+                    t, lens_calc(input_shape.lens(), starts, ends, axes), input_shape.strides()};
+            }
+        }
+        else
+        {
+            // check that starts, ends, and optionally input_axes are all 1D, have the same
+            // dimension, and are static
+            check_shapes{inputs.begin() + 1,
+                         inputs.end(),
+                         std::string("SLICE: inputs (starts, ends, and input_axes)"),
+                         false}
+                .only_dims(1)
+                .same_dims();
+            auto dds = input_shape.to_dynamic().dyn_dims();
+            if(inputs.size() == 3)
+            {
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: inputs starts and ends do not have the same dimension "
+                                   "as the axes attribute");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
+            }
+            else
+            {
+                // if axes is an input, then all the output dimensions could be 0 to the max value
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
+            }
+            return shape{input_shape.type(), dds};
+        }
+    }
+    /**
+     * Calculates the starting offset for the sliced tensor.
+     * Used in compute when only data input and all other information are in the attributes.
+     *
+     * \param s static input shape
+     */
    auto compute_offset(const shape& s) const
    {
        const std::vector<std::size_t>& lens    = s.lens();
@@ -90,80 +189,131 @@ struct slice
                offset += starts[axis] * strides[axis];
            }
        }
-        return offset;
+        return offset * s.type_size();
    }
-    shape normalize_compute_shape(std::vector<shape> inputs) const
+    /**
+     * Calculates the starting offset for the sliced tensor (for aliasing).
+     * Used when the starts and/or the axes are inputs.
+     *
+     * \param s static input shape
+     * \param input_starts starting indices of slice
+     * \param ax_vec axes to slice on
+     */
+    template <class IndView, class Axes>
+    auto compute_offset(const shape& s, const IndView& input_starts, const Axes& ax_vec) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        auto ret = 0;
-        auto input_shape = inputs[0];
+        for(std::size_t i = 0; i < ax_vec.size(); ++i)
-        auto t           = input_shape.type();
-        // TODO:  When support for dynamic shapes is added to normalize_attributes,
-        //  remove this restriction.
-        if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
-               return not input_shape.dyn_dims()[axis].is_fixed();
-           }))
        {
-            MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+            auto axis = ax_vec[i];
+            ret += input_starts[i] * s.strides().at(axis);
        }
+        return ret * s.type_size();
+    }
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(const shape& input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends) const
+    {
+        auto attrs = this->attributes().at("normalize_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input_ends")}};
+    }
+    /**
+     * Three input version of the normalize_inputs.
+     * This one also checks that the input_axes are valid.
+     */
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(shape input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends,
+                     const std::vector<int64_t>& input_axes) const
+    {
+        auto attrs = this->attributes().at("normalize_axes");
+        auto norm_axes =
+            normalize_axes(input_axes, input_shape, attrs.at("axes"), "Slice variable input_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input ends")},
+                {"input_axes", norm_axes}};
+    }
-        // For a static shape, old_lens will be adjusted to a new size
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
-        // for those axes that are sliced.
+    {
-        // For dynamic shape, the adjusted old_lens become the new max values,
+        auto input       = args[0];
-        // while updating the old mins and optimals if possible.
+        auto input_shape = input.get_shape();
-        std::vector<std::size_t> new_mins;
+        switch(args.size())
-        std::vector<std::size_t> old_lens;
-        std::vector<std::size_t> old_strides;
-        // Doesn't handle optimals
-        if(input_shape.dynamic())
        {
-            old_lens = input_shape.max_lens();
+        case 1: {
-            new_mins = input_shape.min_lens();
+            std::size_t offset = compute_offset(input_shape);
+            return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
        }
-        else
+        case 3: {
-        {
+            shape calc_shape;
-            old_lens = input_shape.lens();
+            std::size_t offset = 0;
-            // For static shape (including during eval step after a dynamic input) the strides are
+            visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
-            // indexed into the pre-slice array, so they are larger than the apparent size of the
+                auto norm_inputs = normalize_inputs(input_shape,
-            // resulting shape.
+                                                    input_starts.template to_vector<int64_t>(),
-            old_strides = input_shape.strides();
+                                                    input_ends.template to_vector<int64_t>());
+                offset = compute_offset(input_shape, norm_inputs.at("input_starts"), this->axes);
+                calc_shape = {input_shape.type(),
+                              lens_calc(input_shape.lens(),
+                                        norm_inputs.at("input_starts"),
+                                        norm_inputs.at("input_ends"),
+                                        this->axes),
+                              input_shape.strides()};
+            });
+            return {calc_shape, [=] { return input.data() + offset; }};
        }
+        case 4: {
-        std::vector<std::size_t> new_lens = old_lens;
+            shape calc_shape;
-        for(std::size_t i = 0; i < axes.size(); i++)
+            std::size_t offset = 0;
-        {
+            visit_all(args[1], args[2], args[3])(
-            auto axis            = axes[i];
+                [&](auto input_starts, auto input_ends, auto input_axes) {
-            size_t sliced_length = ends[i] - starts[i];
+                    auto norm_inputs = normalize_inputs(input_shape,
-            // A Numpy indexing convention: a slice size larger than the actual dimension
+                                                        input_starts.template to_vector<int64_t>(),
-            // is legal and the "ends" value is clipped to the axis size
+                                                        input_ends.template to_vector<int64_t>(),
-            new_lens[axis] = std::min(new_lens[axis], sliced_length);
+                                                        input_axes.template to_vector<int64_t>());
-            if(input_shape.dynamic())
+                    offset           = compute_offset(
-            {
+                        input_shape, norm_inputs.at("input_starts"), norm_inputs.at("input_axes"));
-                // TODO: when non-fixed shape slicing is allowed, this will be different than
+                    calc_shape = shape{input_shape.type(),
-                // sliced_length, making use of TBD start/end values.
+                                       lens_calc(input_shape.lens(),
-                std::size_t sliced_min_length = ends[i] - starts[i];
+                                                 norm_inputs.at("input_starts"),
-                // if the slice size is smaller than maxes but larger than mins
+                                                 norm_inputs.at("input_ends"),
-                new_mins[axis] = std::min(sliced_min_length, new_mins[axis]);
+                                                 norm_inputs.at("input_axes")),
-            }
+                                       input_shape.strides()};
+                });
+            return {calc_shape, [=] { return input.data() + offset; }};
        }
-        if(input_shape.dynamic())
+        default: {
-        {
+            // Should never get here; covering in case some code change occurs
-            return shape{t, new_mins, new_lens, {}};
+            MIGRAPHX_THROW("SLICE: invalid number of inputs");
        }
-        else
-        {
-            return shape{t, new_lens, old_strides};
        }
    }
-    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
-    {
-        auto input = args[0];
-        auto offset = compute_offset(input.get_shape()) * dyn_out.computed_shape.type_size();
-        return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
-    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -575,7 +575,7 @@ struct operation
    {
        using std::swap;
        auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
-        if(derived and private_detail_te_handle_mem_var.unique())
+        if(derived and private_detail_te_handle_mem_var.use_count() == 1)
        {
            *derived = std::forward<PrivateDetailTypeErasedT>(value);
        }
@@ -1265,7 +1265,7 @@ struct operation
    private_detail_te_handle_base_type& private_detail_te_get_handle()
    {
        assert(private_detail_te_handle_mem_var != nullptr);
-        if(not private_detail_te_handle_mem_var.unique())
+        if(private_detail_te_handle_mem_var.use_count() > 1)
            private_detail_te_handle_mem_var = private_detail_te_handle_mem_var->clone();
        return *private_detail_te_handle_mem_var;
    }

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -55,6 +55,7 @@
 #include <migraphx/op/equal.hpp>
 #include <migraphx/op/erf.hpp>
 #include <migraphx/op/exp.hpp>
+#include <migraphx/op/fill.hpp>
 #include <migraphx/op/flatten.hpp>
 #include <migraphx/op/floor.hpp>
 #include <migraphx/op/fmod.hpp>

--- a/src/include/migraphx/pad_calc.hpp
+++ b/src/include/migraphx/pad_calc.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -62,6 +62,14 @@ shape compute_padded_shape(const shape& input,
                           const std::vector<std::size_t>& stride,
                           const std::vector<std::size_t>& dilation);
+// Used for dynamic auto padding of pooling operators where padding needs to be computed at
+// evaulation time.
+shape compute_padded_pool_shape(const shape& input,
+                                const shape& kernel,
+                                const std::vector<std::size_t>& padding,
+                                const std::vector<std::size_t>& stride,
+                                const std::vector<std::size_t>& dilation);
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/pass.hpp
+++ b/src/include/migraphx/pass.hpp
@@ -116,7 +116,7 @@ struct pass
    {
        using std::swap;
        auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
-        if(derived and private_detail_te_handle_mem_var.unique())
+        if(derived and private_detail_te_handle_mem_var.use_count() == 1)
        {
            *derived = std::forward<PrivateDetailTypeErasedT>(value);
        }
@@ -292,7 +292,7 @@ struct pass
    private_detail_te_handle_base_type& private_detail_te_get_handle()
    {
        assert(private_detail_te_handle_mem_var != nullptr);
-        if(not private_detail_te_handle_mem_var.unique())
+        if(private_detail_te_handle_mem_var.use_count() > 1)
            private_detail_te_handle_mem_var = private_detail_te_handle_mem_var->clone();
        return *private_detail_te_handle_mem_var;
    }

--- a/src/include/migraphx/permutation.hpp
+++ b/src/include/migraphx/permutation.hpp
@@ -66,6 +66,10 @@ MIGRAPHX_EXPORT std::vector<int64_t> invert_permutation(const std::vector<int64_
 MIGRAPHX_EXPORT std::vector<int64_t> find_permutation(const shape& s);
 MIGRAPHX_EXPORT std::vector<int64_t> find_permutation(const std::vector<shape>& shapes);
+/// Normalize the shapes so the order of dimensions will be in the order it is
+/// in memory as much as possible.
+MIGRAPHX_EXPORT std::vector<shape> normalize_permutation(const std::vector<shape>& shapes);
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/ranges.hpp
+++ b/src/include/migraphx/ranges.hpp
@@ -205,7 +205,7 @@ void transform(Range1&& r1, Range2&& r2, Iterator it, F f)
 }
 template <class Range>
-auto reverse(Range& r)
+auto reverse(Range&& r)
 {
    return range(std::make_reverse_iterator(r.end()), std::make_reverse_iterator(r.begin()));
 }

--- a/src/include/migraphx/schedule_model.hpp
+++ b/src/include/migraphx/schedule_model.hpp
@@ -99,7 +99,7 @@ struct schedule_model
    {
        using std::swap;
        auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
-        if(derived and private_detail_te_handle_mem_var.unique())
+        if(derived and private_detail_te_handle_mem_var.use_count() == 1)
        {
            *derived = std::forward<PrivateDetailTypeErasedT>(value);
        }
@@ -274,7 +274,7 @@ struct schedule_model
    private_detail_te_handle_base_type& private_detail_te_get_handle()
    {
        assert(private_detail_te_handle_mem_var != nullptr);
-        if(not private_detail_te_handle_mem_var.unique())
+        if(private_detail_te_handle_mem_var.use_count() > 1)
            private_detail_te_handle_mem_var = private_detail_te_handle_mem_var->clone();
        return *private_detail_te_handle_mem_var;
    }

--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -263,7 +263,7 @@ struct MIGRAPHX_EXPORT shape
    /// no padding
    bool packed() const;
-    /// Returns true is the shape has been transposed. That is the strides are not in descending
+    /// Returns true if the shape has been transposed. That is the strides are not in descending
    /// order
    bool transposed() const;

--- a/src/include/migraphx/shape_for_each.hpp
+++ b/src/include/migraphx/shape_for_each.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -37,11 +37,11 @@ inline namespace MIGRAPHX_INLINE_NS {
 template <class F>
 void shape_for_each(const migraphx::shape& s, F f)
 {
-    // Ensure calls to f use const ref to vector
-    auto call = [&f](const std::vector<std::size_t>& i) { f(i); };
    std::vector<std::size_t> indices(s.lens().size());
+    const auto& index_const_ref = indices;
    shape ss{s.type(), s.lens()};
-    for(std::size_t i = 0; i < ss.elements(); i++)
+    size_t max = ss.elements();
+    for(std::size_t i = 0; i < max; i++)
    {
        std::transform(ss.strides().begin(),
                       ss.strides().end(),
@@ -51,9 +51,13 @@ void shape_for_each(const migraphx::shape& s, F f)
                           assert(len > 0 and stride > 0);
                           return (i / stride) % len;
                       });
-        call(indices);
+        if constexpr(std::is_invocable<F, decltype(index_const_ref), decltype(i)>{})
+            f(index_const_ref, i);
+        else
+            f(index_const_ref);
    }
 }
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/simplify_reshapes.hpp
+++ b/src/include/migraphx/simplify_reshapes.hpp
@@ -38,6 +38,7 @@ struct module;
 */
 struct MIGRAPHX_EXPORT simplify_reshapes
 {
+    size_t depth = 4;
    std::string name() const { return "simplify_reshapes"; }
    void apply(module& m) const;
 };

--- a/src/include/migraphx/stream_model.hpp
+++ b/src/include/migraphx/stream_model.hpp
@@ -100,7 +100,7 @@ struct stream_model
    {
        using std::swap;
        auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
-        if(derived and private_detail_te_handle_mem_var.unique())
+        if(derived and private_detail_te_handle_mem_var.use_count() == 1)
        {
            *derived = std::forward<PrivateDetailTypeErasedT>(value);
        }
@@ -288,7 +288,7 @@ struct stream_model
    private_detail_te_handle_base_type& private_detail_te_get_handle()
    {
        assert(private_detail_te_handle_mem_var != nullptr);
-        if(not private_detail_te_handle_mem_var.unique())
+        if(private_detail_te_handle_mem_var.use_count() > 1)
            private_detail_te_handle_mem_var = private_detail_te_handle_mem_var->clone();
        return *private_detail_te_handle_mem_var;
    }

--- a/src/include/migraphx/stringutils.hpp
+++ b/src/include/migraphx/stringutils.hpp
@@ -86,7 +86,7 @@ inline std::string join_strings(Strings strings, const std::string& delim)
 inline std::vector<std::string> split_string(const std::string& s, char delim)
 {
    std::vector<std::string> elems;
-    std::stringstream ss(s + ' ');
+    std::stringstream ss(s + delim);
    std::string item;
    while(std::getline(ss, item, delim))
    {
@@ -149,6 +149,10 @@ interpolate_string(const std::string& input, F f, std::string start = "${", std:
        result.append(it, next_start);
        if(next_start == input.end())
            break;
+        if(next_end == input.end())
+        {
+            throw std::runtime_error("Unbalanced brackets");
+        }
        auto r = f(next_start + start.size(), next_end);
        result.append(r.begin(), r.end());
        it = next_end + end.size();

--- a/src/include/migraphx/target.hpp
+++ b/src/include/migraphx/target.hpp
@@ -167,7 +167,7 @@ struct target
    {
        using std::swap;
        auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
-        if(derived and private_detail_te_handle_mem_var.unique())
+        if(derived and private_detail_te_handle_mem_var.use_count() == 1)
        {
            *derived = std::forward<PrivateDetailTypeErasedT>(value);
        }
@@ -428,7 +428,7 @@ struct target
    private_detail_te_handle_base_type& private_detail_te_get_handle()
    {
        assert(private_detail_te_handle_mem_var != nullptr);
-        if(not private_detail_te_handle_mem_var.unique())
+        if(private_detail_te_handle_mem_var.use_count() > 1)
            private_detail_te_handle_mem_var = private_detail_te_handle_mem_var->clone();
        return *private_detail_te_handle_mem_var;
    }

--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -64,10 +64,7 @@ void instruction::replace(const shape& r)
        result = r;
        for(auto&& ins : output)
        {
-            if(ins->name() == "@return")
+            assert(ins->name() == "@return" or ins->name().front() != '@');
-                continue;
-            assert(ins->name().front() != '@');
            ins->recompute_shape();
        }
    }
@@ -122,10 +119,6 @@ bool instruction::valid() const
    {
        computed = result;
    }
-    else if(op.name() == "@return")
-    {
-        computed = {};
-    }
    else
    {
        try
@@ -145,6 +138,7 @@ bool instruction::valid() const
 }
 shape instruction::get_shape() const { return result; }
 const literal& instruction::get_literal() const
 {
    assert(op.name() == "@literal");
@@ -395,7 +389,7 @@ void instruction::print(std::ostream& os,
    if(not ins->module_inputs().empty())
    {
        std::string delim = ", [";
-        for(auto&& mod_arg : ins->module_inputs())
+        for(const const_module_ref& mod_arg : ins->module_inputs())
        {
            os << delim << mod_arg->name();
            delim = ", ";

--- a/src/load_save.cpp
+++ b/src/load_save.cpp
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+#include <migraphx/instruction.hpp>
 #include <migraphx/load_save.hpp>
 #include <migraphx/file_buffer.hpp>
 #include <migraphx/json.hpp>
@@ -60,9 +61,29 @@ void save(const program& p, const std::string& filename, const file_options& opt
 {
    write_buffer(filename, save_buffer(p, options));
 }
+// MIOpen doesn't support serializing fusion plans with Find-2.0 APIs
+void print_miopen_warning(const program& p)
+{
+    auto mods = p.get_modules();
+    if(std::any_of(mods.begin(), mods.end(), [](const auto* m) {
+           return std::any_of(m->begin(), m->end(), [](const instruction& i) {
+               return i.name() == "gpu::miopen_fusion";
+           });
+       }))
+    {
+        std::cout << "[WARNING]: Program has miopen_fusion instructions for which tuned solutions "
+                     "are not stored inside serialized MIGraphX program. Consider serializing with "
+                     "MIGRAPHX_DISABLE_MIOPEN_FUSION=1 flag set."
+                  << std::endl;
+        ;
+    }
+}
 std::vector<char> save_buffer(const program& p, const file_options& options)
 {
    value v = p.to_value();
+    print_miopen_warning(p);
    std::vector<char> buffer;
    if(options.format == "msgpack")
    {