Merge branch 'develop' into dyn_resize_gather

13d14c66 · Brian Pickrell · f4e7d9d9 · d1abf06f · 13d14c66 · 13d14c66
Commit 13d14c66 authored Oct 24, 2023 by Brian Pickrell
20 changed files
--- a/src/include/migraphx/generate.hpp
+++ b/src/include/migraphx/generate.hpp
@@ -48,7 +48,7 @@ constexpr T normalize(unsigned long z)
 template <class T, MIGRAPHX_REQUIRES(is_signed<T>{} and not is_floating_point<T>{})>
 constexpr T normalize(unsigned long z)
 {
-    const auto max      = 1UL << (sizeof(T) * 5);
+    const auto max      = 1ULL << (sizeof(T) * 5);
    const auto half_max = max / 2;
    return half_max - (z % max);
 }
@@ -58,7 +58,7 @@ template <class T,
                            not std::is_same<T, bool>{})>
 constexpr T normalize(unsigned long z)
 {
-    const auto max = 1UL << (sizeof(T) * 5);
+    const auto max = 1ULL << (sizeof(T) * 5);
    return z % max;
 }

--- a/src/include/migraphx/instruction.hpp
+++ b/src/include/migraphx/instruction.hpp
@@ -81,6 +81,7 @@ struct MIGRAPHX_EXPORT instruction
    const std::vector<module_ref>& module_inputs() const;
+    /// Where this instruction is used as an input to another instruction
    const std::vector<instruction_ref>& outputs() const;
    friend bool operator==(const instruction& x, const instruction& y);

--- a/src/include/migraphx/instruction_ref.hpp
+++ b/src/include/migraphx/instruction_ref.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -27,12 +27,42 @@
 #include <list>
 #include <functional>
 #include <migraphx/config.hpp>
+#include <migraphx/requires.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 struct instruction;
+#if defined(_WIN32) && !defined(NDEBUG)
+struct instruction_ref : std::list<instruction>::iterator
+{
+    using instruction_iter       = std::list<instruction>::iterator;
+    using instruction_const_iter = std::list<instruction>::const_iterator;
+    instruction_ref() = default;
+    instruction_ref(const instruction_iter& other) : instruction_iter(other) {}
+    template <class T,
+              class U,
+              MIGRAPHX_REQUIRES(std::is_same<T, instruction_ref>{} or
+                                std::is_same<U, instruction_ref>{})>
+    friend bool operator==(const T& x, const U& y)
+    {
+        return x._Unwrapped()._Ptr == y._Unwrapped()._Ptr;
+    }
+    template <class T,
+              class U,
+              MIGRAPHX_REQUIRES(std::is_same<T, instruction_ref>{} or
+                                std::is_same<U, instruction_ref>{})>
+    friend bool operator!=(const T& x, const U& y)
+    {
+        return not(x == y);
+    }
+};
+#else
 using instruction_ref = std::list<instruction>::iterator;
+#endif
 MIGRAPHX_EXPORT migraphx::instruction* as_address(const instruction_ref& ins) noexcept;
@@ -65,4 +95,8 @@ struct equal_to<migraphx::instruction_ref> // NOLINT
 } // namespace std
+#ifdef _MSC_VER
+#include <migraphx/instruction.hpp>
+#endif
 #endif
--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -33,6 +33,7 @@
 #include <migraphx/type_name.hpp>
 #include <migraphx/source_location.hpp>
 #include <migraphx/config.hpp>
+#include <array>
 #include <unordered_map>
 #include <unordered_set>

--- a/src/include/migraphx/normalize_attributes.hpp
+++ b/src/include/migraphx/normalize_attributes.hpp
@@ -52,6 +52,7 @@ using dependent_type = typename select_dependent_type<T, Ts...>::type;
 * \param attr_val the normalize_axes attributes from the operator
 * \param prefix error message prefix
 */
+MIGRAPHX_EXPORT
 std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
                                    const shape& input_shape,
                                    const value& attr_val,
@@ -67,6 +68,7 @@ std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
 * \param attr_val the normalize_axes attributes from the operator
 * \param prefix error message prefix
 */
+MIGRAPHX_EXPORT
 std::vector<int64_t> normalize_indices(const std::vector<int64_t>& indices,
                                       const std::vector<int64_t>& axes,
                                       const shape& input_shape,

--- a/src/include/migraphx/op/allocate.hpp
+++ b/src/include/migraphx/op/allocate.hpp
@@ -33,6 +33,19 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Static allocate:
+ * No inputs: `allocate()`
+ * `this.s` attribute set to the static output shape of the buffer.
+ *
+ * Dynamic allocate:
+ * One input: `allocate(output_dims)`
+ * `output_dims` are the output buffer dimensions and has a static shape.
+ * Either `this.s` or `this.buf_type` must be set to calculate the dynamic output shape at compute
+ * time. If `this.buf_type` is set, the compute_shape() of allocate at compile time will have
+ * dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set then the
+ * compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
+ */
 struct allocate
 {
    shape s{};
@@ -49,17 +62,22 @@ struct allocate
    shape compute_shape(const std::vector<shape>& inputs) const
    {
-        migraphx::check_shapes{inputs, *this, true}.has(0, 1);
-        // check if shape attribute is not default
        if(s != shape())
        {
+            if(inputs.size() == 1)
+            {
+                migraphx::check_shapes{inputs, *this, false}.only_dims(1);
+            }
+            else
+            {
+                migraphx::check_shapes{inputs, *this, false}.has(0);
+            }
            return s;
        }
        else
        {
+            migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
            const auto& out_dims = inputs.at(0);
-            assert(not out_dims.dynamic());
-            assert(out_dims.ndim() == 1);
            std::size_t max_val = std::numeric_limits<std::size_t>::max();
            std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
                                                           shape::dynamic_dimension{0, max_val});
@@ -70,13 +88,13 @@ struct allocate
    {
        if(args.empty())
        {
-            return {output_shape};
+            return argument{output_shape};
        }
        else
        {
            std::vector<std::size_t> output_dims(output_shape.ndim());
            args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
-            return {shape{buf_type, output_dims}};
+            return argument{shape{buf_type, output_dims}};
        }
    }
 };

--- a/src/include/migraphx/op/argmax.hpp
+++ b/src/include/migraphx/op/argmax.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -31,6 +31,7 @@
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
 #include <migraphx/dyn_output.hpp>
+#include <migraphx/float_equal.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -39,11 +40,12 @@ namespace op {
 struct argmax
 {
    int64_t axis           = 0;
+    bool select_last_index = false;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.axis, "axis"));
+        return pack(f(self.axis, "axis"), f(self.select_last_index, "select_last_index"));
    }
    value attributes() const
@@ -87,6 +89,10 @@ struct argmax
                max_val   = cur_val;
                max_index = i;
            }
+            else if(select_last_index and float_equal(max_val, cur_val))
+            {
+                max_index = i;
+            }
        }
        return max_index;
    }

--- a/src/include/migraphx/op/argmin.hpp
+++ b/src/include/migraphx/op/argmin.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -30,6 +30,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/float_equal.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -38,11 +39,12 @@ namespace op {
 struct argmin
 {
    int64_t axis = 0;
+    bool select_last_index = false;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.axis, "axis"));
+        return pack(f(self.axis, "axis"), f(self.select_last_index, "select_last_index"));
    }
    value attributes() const
@@ -78,6 +80,10 @@ struct argmin
                min_val   = cur_val;
                min_index = i;
            }
+            else if(select_last_index and float_equal(min_val, cur_val))
+            {
+                min_index = i;
+            }
        }
        return min_index;

--- a/src/include/migraphx/op/convert.hpp
+++ b/src/include/migraphx/op/convert.hpp
@@ -68,7 +68,7 @@ struct convert : unary<convert>
            auto y = x;
            shape::visit(type, [&](auto as) {
                // clamping value between target_type's max and min doesn't work for NaNs,
-                if(std::isnan(x))
+                if(std::isnan(static_cast<double>(x)))
                {
                    y = as.nan();
                }

--- a/src/include/migraphx/op/fill.hpp
+++ b/src/include/migraphx/op/fill.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_FILL_HPP
+#define MIGRAPHX_GUARD_OPERATORS_FILL_HPP
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/dyn_output.hpp>
+#include <migraphx/par_for.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+/**
+ * fill(default_value, output_buffer)
+ * Fill an output buffer with the given default_value.
+ * Note that if the default_value is a literal and the output_buffer
+ * has a static shape this operator can be replaced with a literal.
+ */
+struct fill
+{
+    std::string name() const { return "fill"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(2).same_type();
+        if(inputs.at(0).dynamic() or inputs.at(0).elements() != 1)
+        {
+            MIGRAPHX_THROW("FILL: default_value is dynamic or more than one element");
+        }
+        return inputs.back();
+    }
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
+    {
+        visit_all(args[0], args[1])([&](auto value, auto output) {
+            par_for(dyn_out.computed_shape.elements(), [&](auto i) { output[i] = value.front(); });
+        });
+        return args[1];
+    }
+    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/isnan.hpp
+++ b/src/include/migraphx/op/isnan.hpp
@@ -35,7 +35,7 @@ struct isnan : unary<isnan>
 {
    auto apply() const
    {
-        return [](auto x) { return std::isnan(x); };
+        return [](auto x) { return std::isnan(static_cast<double>(x)); };
    }
    std::string name() const { return "isnan"; }

--- a/src/include/migraphx/op/nonmaxsuppression.hpp
+++ b/src/include/migraphx/op/nonmaxsuppression.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
 #define MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
+#include <array>
 #include <cmath>
 #include <queue>
 #include <cstdint>

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
@@ -411,7 +411,7 @@ struct pooling
            // for dynamic GlobalPooling, there's no padding
            kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
            output_shape = dyn_out.computed_shape;
-            result       = dyn_out.computed_shape;
+            result       = argument{dyn_out.computed_shape};
        }
        else if((padding_mode != op::padding_mode_t::default_))
        {
@@ -439,7 +439,7 @@ struct pooling
        {
            kernel_dims  = this->lengths;
            output_shape = dyn_out.computed_shape;
-            result       = dyn_out.computed_shape;
+            result       = argument{dyn_out.computed_shape};
        }
        // Perform the computation and populate result

--- a/src/include/migraphx/op/random_seed.hpp
+++ b/src/include/migraphx/op/random_seed.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_RANDOM_SEED_HPP
+#define MIGRAPHX_GUARD_OPERATORS_RANDOM_SEED_HPP
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <random>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+/**
+ *    Generates a random seed for the use of random number generators.  Generating the seed
+ * at runtime guarantees there will be a different random sequence on every execution.
+ * This operation has no inputs or attributes, and outputs an unsigned integer tensor with
+ * a single value.
+ */
+struct random_seed
+{
+    shape::type_t dtype = shape::type_t::uint64_type;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.dtype, "dtype"));
+    }
+    std::string name() const { return "random_seed"; }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this}.has(0);
+        return shape{dtype};
+    }
+    argument compute(const shape& output_shape, const std::vector<argument>&) const
+    {
+        argument result(output_shape);
+        result.visit([&](auto output) { output.front() = std::random_device{}(); });
+        return result;
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/random_uniform.hpp
+++ b/src/include/migraphx/op/random_uniform.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+/**
+ * Random Uniform distribution operator.  Given a shape, populate it with random
+ * values.  Calls to random_uniform using the same randomization seed as a
+ * literal input will
+ * always generate the same pseudo-random sequence.
+ *
+ *      Inputs:   (1) randomization seed (any type is allowed)
+ *                (2) output buffer argument to be populated.
+ *
+ *      Attributes:  none
+ *
+ *      Output:   Returns the buffer from input #2.
+ *
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_RANDOM_UNIFORM_HPP
+#define MIGRAPHX_GUARD_OPERATORS_RANDOM_UNIFORM_HPP
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <random>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+/**
+ * random_uniform populates the passed shape with random numbers, in a uniform
+ * distribution.  Range for floating-point data types is (0, 1);
+ * for integer types it is [0, <max value for the type>]
+ */
+struct random_uniform
+{
+    // The random_uniform operation needs the random number generator seed
+    // to be passed as a runtime input.
+    std::string name() const { return "random_uniform"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(2);
+        return inputs.at(1);
+    }
+    argument compute(const shape&, std::vector<argument> args) const
+    {
+        // Output goes into the passed buffer, not the shape output.
+        auto result = args[1];
+        uint64_t local_seed = args[0].at<uint64_t>(0);
+        std::mt19937 gen(local_seed);
+        result.visit([&](auto output) {
+            using type = typename decltype(output)::value_type;
+            if constexpr(std::is_integral<type>{})
+            {
+#ifdef _MSC_VER
+                // According to the C++ specification, the effect is undefined if the result type
+                // for the generator is not one of short, int, long, long long, unsigned short,
+                // unsigned int, unsigned long, or unsigned long long. See
+                // https://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution.
+                if constexpr(sizeof(type) == 1)
+                {
+                    std::uniform_int_distribution<int> dis{std::numeric_limits<type>::min(),
+                                                           std::numeric_limits<type>::max()};
+                    std::generate(output.begin(), output.end(), [&] { return dis(gen); });
+                }
+                else
+#endif
+                {
+                    // default range for all integer types is
+                    // (0, std::uniform_int_distribution<type>::max()).
+                    // Todo:  enable different ranges
+                    std::uniform_int_distribution<type> dis;
+                    std::generate(output.begin(), output.end(), [&] { return dis(gen); });
+                }
+            }
+            else
+            {
+                // default real distribution type is double with range (0, 1);
+                std::uniform_real_distribution<> dis;
+                std::generate(output.begin(), output.end(), [&] { return dis(gen); });
+            }
+        });
+        return result;
+    }
+    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/reshape.hpp
+++ b/src/include/migraphx/op/reshape.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -29,12 +29,29 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/dyn_output.hpp>
-#include <migraphx/optional.hpp>
+#include <algorithm>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * 1 input version:
+ * reshape(input_data)
+ * this.dims = output_dims
+ * Makes a copy of input_data to the output shape.
+ *
+ * 2 input version:
+ * reshape(input_data, output_buffer)
+ * this.dims = unset
+ * Copies input_data to output_buffer; output_buffer already has the output shape.
+ * This version will not fail gracefully if the input shape and output_buffer shape are
+ * incompatible. There's a throw that will catch when the number of elements do not match at
+ * runtime. This version should only be used for dynamic reshapes (output dimensions only known at
+ * runtime). If output_buffer has a static shape during compile/parse, you can use the 1 input
+ * version.
+ */
 struct reshape
 {
    std::vector<int64_t> dims;
@@ -45,8 +62,6 @@ struct reshape
        return pack(f(self.dims, "dims"));
    }
-    value attributes() const { return {{"require_std_shape", true}}; }
    std::string name() const { return "reshape"; }
    shape dyn_compute_shape(shape s0) const
@@ -110,27 +125,9 @@ struct reshape
        return it;
    }
-    template <class DimIterator, class StrideIterator>
+    // This will attempt to alias the dimensions of the input shape to the lens of
-    static auto can_strides_merge(DimIterator dim_start,
+    // `rdims`. Unlike reshape_lazy though we can modify memory layout with copies and this
-                                  DimIterator dim_last,
+    // can remove previous nullopts that were sent back for the alias case
-                                  StrideIterator stride_start,
-                                  StrideIterator stride_last)
-    {
-        assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
-        auto cstride = *std::prev(stride_last);
-        return std::equal(std::make_reverse_iterator(dim_last),
-                          std::make_reverse_iterator(dim_start + 1),
-                          std::make_reverse_iterator(stride_last - 1),
-                          std::make_reverse_iterator(stride_start),
-                          [&](auto dim, auto stride) {
-                              cstride *= dim;
-                              return stride == cstride;
-                          });
-    }
-    // This will reshape the dimesions of the input shape to use the lens of
-    // `rdims`. If this can't be done without changing memory layout then it
-    // will return nullopt
    static optional<shape> reshape_dims(const shape& input, const std::vector<std::size_t>& rdims)
    {
        if(input.standard())
@@ -155,13 +152,8 @@ struct reshape
            {
                auto start = idims.begin() + i;
                auto it    = compute_end_dim(start, idims.end(), rdim);
-                if(it == start)
-                    return nullopt;
                auto n = it - start;
                assert((i + n) <= istrides.size());
-                if(not can_strides_merge(
-                       start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1))
-                    return nullopt;
                i += n;
                rstrides.push_back(istrides[i]);
            }
@@ -170,8 +162,7 @@ struct reshape
            {
                auto start = rdims.begin() + i;
                auto it    = compute_end_dim(start, rdims.end(), idim);
-                if(it == start)
-                    return nullopt;
                auto n = it - start;
                assert((r + n) <= rdims.size());
                auto stride = istrides[i] * idim;
@@ -191,15 +182,11 @@ struct reshape
            auto stride = rstrides.back();
            for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
            {
-                if(d != 1)
+                (void)d;
-                    return nullopt;
                rstrides.push_back(stride);
            }
        }
-        if(rdims.size() != rstrides.size())
-            return nullopt;
        return shape{input.type(), rdims, rstrides};
    }
@@ -233,25 +220,26 @@ struct reshape
        }
        auto s = reshape_dims(inputs.front(), rdims);
-        if(not s.has_value())
-            MIGRAPHX_THROW("Reshape on axis that is not packed.");
        if(s->elements() != inputs.front().elements())
-            MIGRAPHX_THROW("Reshape: Wrong number of elements for reshape: reshape has " +
+            MIGRAPHX_THROW("reshape: Wrong number of elements for reshape: reshape has " +
                           std::to_string(s->elements()) + " elements whereas the input has " +
                           std::to_string(inputs.front().elements()));
-        assert(s->bytes() == inputs.front().bytes());
        return *s;
    }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        check_shapes{inputs, *this, true}.has(1, 2);
        auto n_neg_dims = std::count(dims.begin(), dims.end(), -1);
        if(n_neg_dims > 1)
-            MIGRAPHX_THROW("Reshape: Dimensions for reshape can only have one -1 dim");
+            MIGRAPHX_THROW("reshape: Dimensions for reshape can only have one -1 dim");
-        auto s0 = inputs[0];
+        auto s0 = inputs.front();
+        if(inputs.size() == 1)
+        {
            if(s0.dynamic())
            {
                return dyn_compute_shape(s0);
@@ -261,13 +249,39 @@ struct reshape
                return static_compute_shape(inputs, n_neg_dims);
            }
        }
+        else
+        {
+            return inputs.back();
+        }
+    }
    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(dyn_out.computed_shape);
+        assert(dyn_out.computed_shape.standard());
-    }
+        if(args.size() == 1)
+        {
+            argument result{dyn_out.computed_shape};
-    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
+            visit_all(result, args[0])([&](auto output, auto input) {
+                std::copy(input.begin(), input.end(), output.begin());
+            });
+            return result;
+        }
+        else
+        {
+            // 2 arg
+            if(args[0].get_shape().elements() != args[1].get_shape().elements())
+            {
+                MIGRAPHX_THROW("Reshape: Number of elements must match at runtime. Input: " +
+                               std::to_string(args[0].get_shape().elements()) +
+                               " Output buffer: " + std::to_string(args[1].get_shape().elements()));
+            }
+            visit_all(args[1], args[0])([&](auto output, auto input) {
+                std::copy(input.begin(), input.end(), output.begin());
+            });
+            return args[1];
+        }
+    }
 };
 } // namespace op

--- a/src/include/migraphx/op/reshape_lazy.hpp
+++ b/src/include/migraphx/op/reshape_lazy.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_RESHAPE_LAZY_HPP
+#define MIGRAPHX_GUARD_OPERATORS_RESHAPE_LAZY_HPP
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
+#include <migraphx/optional.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct reshape_lazy
+{
+    std::vector<int64_t> dims;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.dims, "dims"));
+    }
+    value attributes() const { return {{"require_std_shape", true}}; }
+    std::string name() const { return "reshape_lazy"; }
+    shape dyn_compute_shape(shape s0) const
+    {
+        auto dyn_dims      = s0.dyn_dims();
+        auto num_not_fixed = std::count_if(
+            dyn_dims.cbegin(), dyn_dims.cend(), [](auto dd) { return not dd.is_fixed(); });
+        if(num_not_fixed != 1)
+        {
+            MIGRAPHX_THROW("reshape_lazy: Only supports one non-fixed dynamic_dimension");
+        }
+        // track number of fixed elements in input and output
+        std::size_t num_dims_ele = 1;
+        std::size_t num_dd_ele   = 1;
+        for(std::size_t i = 0; i < dyn_dims.size(); ++i)
+        {
+            if(dyn_dims[i].is_fixed())
+            {
+                num_dims_ele *= dims[i];
+                num_dd_ele *= dyn_dims[i].min;
+            }
+            else
+            {
+                if(dims[i] != 0 and dims[i] != -1)
+                {
+                    MIGRAPHX_THROW(
+                        "reshape_lazy: Non-fixed dynamic_dimension doesn't match with 0 or -1 "
+                        "output dimension");
+                }
+            }
+        }
+        if(num_dims_ele != num_dd_ele)
+        {
+            MIGRAPHX_THROW("reshape_lazy: Number of fixed elements must match. Input: " +
+                           std::to_string(num_dd_ele) + " Output: " + std::to_string(num_dims_ele));
+        }
+        // construct output dynamic shape from dims attribute
+        std::vector<shape::dynamic_dimension> output_dyn_dims(dims.size());
+        std::transform(dims.cbegin(),
+                       dims.cend(),
+                       dyn_dims.cbegin(),
+                       output_dyn_dims.begin(),
+                       [](std::size_t dim, auto dyn_dim) {
+                           if(not dyn_dim.is_fixed())
+                               return dyn_dim;
+                           return shape::dynamic_dimension{dim, dim};
+                       });
+        return {s0.type(), output_dyn_dims};
+    }
+    template <class Iterator>
+    static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim)
+    {
+        std::size_t x = 1;
+        auto it       = std::find_if(start, last, [&](auto i) {
+            x *= i;
+            return x >= dim;
+        });
+        if(x != dim)
+            return start;
+        return it;
+    }
+    template <class DimIterator, class StrideIterator>
+    static auto can_strides_merge(DimIterator dim_start,
+                                  DimIterator dim_last,
+                                  StrideIterator stride_start,
+                                  StrideIterator stride_last)
+    {
+        assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
+        auto cstride = *std::prev(stride_last);
+        return std::equal(std::make_reverse_iterator(dim_last),
+                          std::make_reverse_iterator(dim_start + 1),
+                          std::make_reverse_iterator(stride_last - 1),
+                          std::make_reverse_iterator(stride_start),
+                          [&](auto dim, auto stride) {
+                              cstride *= dim;
+                              return stride == cstride;
+                          });
+    }
+    // This will attempt to alias the dimensions of the input shape to the lens of
+    // `rdims`. If this can't be done without changing memory layout then it
+    // will return nullopt
+    static optional<shape> reshape_lazy_dims(const shape& input,
+                                             const std::vector<std::size_t>& rdims)
+    {
+        if(input.standard())
+            return shape{input.type(), rdims};
+        const auto& idims    = input.lens();
+        const auto& istrides = input.strides();
+        std::vector<std::size_t> rstrides;
+        std::size_t i = 0;
+        std::size_t r = 0;
+        while(i < idims.size() and r < rdims.size())
+        {
+            auto idim = idims[i];
+            auto rdim = rdims[r];
+            if(rdim == idim)
+            {
+                rstrides.push_back(istrides[i]);
+            }
+            // squeeze
+            else if(rdim > idim)
+            {
+                auto start = idims.begin() + i;
+                auto it    = compute_end_dim(start, idims.end(), rdim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                assert((i + n) <= istrides.size());
+                if(not can_strides_merge(
+                       start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1))
+                    return nullopt;
+                i += n;
+                rstrides.push_back(istrides[i]);
+            }
+            // unsqueeze
+            else // if(rdim < idim)
+            {
+                auto start = rdims.begin() + i;
+                auto it    = compute_end_dim(start, rdims.end(), idim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                assert((r + n) <= rdims.size());
+                auto stride = istrides[i] * idim;
+                std::for_each(start, it + 1, [&](auto dim) {
+                    stride /= dim;
+                    rstrides.push_back(stride);
+                });
+                r += n;
+            }
+            i++;
+            r++;
+        }
+        // Handle trailing 1s
+        if(rstrides.size() < rdims.size() and not rstrides.empty())
+        {
+            auto stride = rstrides.back();
+            for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
+            {
+                if(d != 1)
+                    return nullopt;
+                rstrides.push_back(stride);
+            }
+        }
+        if(rdims.size() != rstrides.size())
+            return nullopt;
+        return shape{input.type(), rdims, rstrides};
+    }
+    shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        auto&& idims = inputs.front().lens();
+        std::vector<std::size_t> rdims(dims.begin(), dims.end());
+        for(std::size_t i = 0; i < dims.size(); i++)
+        {
+            if(dims[i] == 0)
+                rdims[i] = idims[i];
+            // since rdims using size_t type, -1 is the max value
+            // is size_t that cause later compuation incorrect
+            if(dims[i] == -1)
+                rdims[i] = 1;
+        }
+        if(n_neg_dims > 0)
+        {
+            size_t missing_dim =
+                inputs.front().elements() /
+                std::accumulate(rdims.begin(), rdims.end(), 1, std::multiplies<int64_t>());
+            for(std::size_t i = 0; i < rdims.size(); i++)
+            {
+                if(dims[i] == -1)
+                    rdims[i] = missing_dim;
+            }
+        }
+        auto s = reshape_lazy_dims(inputs.front(), rdims);
+        if(not s.has_value())
+            MIGRAPHX_THROW("reshape_lazy on axis that is not packed.");
+        if(s->elements() != inputs.front().elements())
+            MIGRAPHX_THROW(
+                "reshape_lazy: Wrong number of elements for reshape_lazy: reshape_lazy has " +
+                std::to_string(s->elements()) + " elements whereas the input has " +
+                std::to_string(inputs.front().elements()));
+        assert(s->bytes() == inputs.front().bytes());
+        return *s;
+    }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1);
+        auto n_neg_dims = std::count(dims.begin(), dims.end(), -1);
+        if(n_neg_dims > 1)
+            MIGRAPHX_THROW("reshape_lazy: Dimensions for reshape_lazy can only have one -1 dim");
+        auto s0 = inputs[0];
+        if(s0.dynamic())
+        {
+            return dyn_compute_shape(s0);
+        }
+        else
+        {
+            return static_compute_shape(inputs, n_neg_dims);
+        }
+    }
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
+    {
+        return args[0].reshape(dyn_out.computed_shape);
+    }
+    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/roialign.hpp
+++ b/src/include/migraphx/op/roialign.hpp
@@ -33,6 +33,7 @@
 #include <migraphx/dfor.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/shape_for_each.hpp>
+#include <array>
 #include <cmath>
 #include <numeric>
 #include <utility>
@@ -124,7 +125,7 @@ struct roialign
            {
                xy[ii] = roi_start[ii] + p[ii] * bin_size[ii] +
                         (i[ii] + .5f) * bin_size[ii] / bin_grid_size[ii];
-                xy[ii] = (coord_trans_mode == "output_half_pixel") ? (xy[ii] - 0.5f) : xy[ii];
+                xy[ii] = (coord_trans_mode == "half_pixel") ? (xy[ii] - 0.5f) : xy[ii];
                if(xy[ii] < -1.0 or xy[ii] > dims[ii])
                {
                    results[index] = pos_weight{};

--- a/src/include/migraphx/op/scatter.hpp
+++ b/src/include/migraphx/op/scatter.hpp
@@ -66,7 +66,7 @@ struct scatter : op_name<Derived>
    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3).standard();
+        check_shapes{inputs, *this}.has(3);
        // If non-packed, this converts to a packed output while preserving permutation of tensor
        return inputs.front().with_lens(inputs.front().lens());
    }

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -55,6 +55,7 @@
 #include <migraphx/op/equal.hpp>
 #include <migraphx/op/erf.hpp>
 #include <migraphx/op/exp.hpp>
+#include <migraphx/op/fill.hpp>
 #include <migraphx/op/flatten.hpp>
 #include <migraphx/op/floor.hpp>
 #include <migraphx/op/fmod.hpp>