manual merge

4ea39116 · Khalique Ahmed · 20128cae · d8011adf · 4ea39116 · 4ea39116
Commit 4ea39116 authored Nov 10, 2023 by Khalique Ahmed
20 changed files
--- a/src/include/migraphx/normalize_attributes.hpp
+++ b/src/include/migraphx/normalize_attributes.hpp
@@ -52,6 +52,7 @@ using dependent_type = typename select_dependent_type<T, Ts...>::type;
 * \param attr_val the normalize_axes attributes from the operator
 * \param prefix error message prefix
 */
+MIGRAPHX_EXPORT
 std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
                                    const shape& input_shape,
                                    const value& attr_val,
@@ -67,6 +68,7 @@ std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
 * \param attr_val the normalize_axes attributes from the operator
 * \param prefix error message prefix
 */
+MIGRAPHX_EXPORT
 std::vector<int64_t> normalize_indices(const std::vector<int64_t>& indices,
                                       const std::vector<int64_t>& axes,
                                       const shape& input_shape,

--- a/src/include/migraphx/onnx.hpp
+++ b/src/include/migraphx/onnx.hpp
@@ -48,8 +48,12 @@ struct onnx_options
    bool skip_unknown_operators = false;
    /// Print program if an error occurs
    bool print_program_on_error = false;
-    /// Max iter num for the loop operator
+    /// Max iter num for the loop operator if trip count is not set
    int64_t max_loop_iterations = 10;
+    /// Max iter limit for the loop operator.
+    /// Since loop will become a tensor of max iter size a huge number can cause overflow during
+    /// shape computations.
+    int64_t limit_max_iterations = std::numeric_limits<uint16_t>::max();
    /// Use dynamic output for operators when available
    bool use_dyn_output = false;
 };

--- a/src/include/migraphx/op/allocate.hpp
+++ b/src/include/migraphx/op/allocate.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -33,11 +33,26 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Static allocate:
+ * No inputs: `allocate()`
+ * `this.s` attribute set to the static output shape of the buffer.
+ * `this.s` attribute can be set to a dynamic output shape; however this will allocate the maximum
+ * buffer size for that case
+ *
+ * Dynamic allocate:
+ * One input: `allocate(output_dims)`
+ * `output_dims` are the output buffer dimensions and has a static shape.
+ * Either `this.s` or `this.buf_type` (but not both) must be set to calculate the dynamic output
+ * shape at compute time. If `this.buf_type` is set, the compute_shape() of allocate at compile time
+ * will have dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set
+ * then the compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
+ */
 struct allocate
 {
-    shape s{};
+    optional<shape> s;
    // for dynamic allocate to set the buffer type
-    shape::type_t buf_type = shape::half_type;
+    optional<shape::type_t> buf_type;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -49,8 +64,12 @@ struct allocate
    shape compute_shape(const std::vector<shape>& inputs) const
    {
-        if(s != shape())
+        if(s.has_value())
        {
+            if(buf_type.has_value())
+            {
+                MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both set");
+            }
            if(inputs.size() == 1)
            {
                migraphx::check_shapes{inputs, *this, false}.only_dims(1);
@@ -59,29 +78,37 @@ struct allocate
            {
                migraphx::check_shapes{inputs, *this, false}.has(0);
            }
-            return s;
+            return s.value();
        }
        else
        {
+            if(not buf_type.has_value())
+            {
+                MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both not set");
+            }
            migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
            const auto& out_dims = inputs.at(0);
            std::size_t max_val  = std::numeric_limits<std::size_t>::max();
            std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
                                                           shape::dynamic_dimension{0, max_val});
-            return {buf_type, dyn_dims};
+            return {buf_type.value(), dyn_dims};
        }
    }
    argument compute(const shape& output_shape, const std::vector<argument>& args) const
    {
        if(args.empty())
        {
-            return {output_shape};
+            return argument{output_shape};
        }
        else
        {
            std::vector<std::size_t> output_dims(output_shape.ndim());
            args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
-            return {shape{buf_type, output_dims}};
+            if(s)
+            {
+                return argument{shape{s->type(), output_dims}};
+            }
+            return argument{shape{buf_type.value(), output_dims}};
        }
    }
 };

--- a/src/include/migraphx/op/argmax.hpp
+++ b/src/include/migraphx/op/argmax.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -31,6 +31,7 @@
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
 #include <migraphx/dyn_output.hpp>
+#include <migraphx/float_equal.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -39,11 +40,12 @@ namespace op {
 struct argmax
 {
    int64_t axis           = 0;
+    bool select_last_index = false;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.axis, "axis"));
+        return pack(f(self.axis, "axis"), f(self.select_last_index, "select_last_index"));
    }
    value attributes() const
@@ -87,6 +89,10 @@ struct argmax
                max_val   = cur_val;
                max_index = i;
            }
+            else if(select_last_index and float_equal(max_val, cur_val))
+            {
+                max_index = i;
+            }
        }
        return max_index;
    }

--- a/src/include/migraphx/op/argmin.hpp
+++ b/src/include/migraphx/op/argmin.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -30,6 +30,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/float_equal.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -38,11 +39,12 @@ namespace op {
 struct argmin
 {
    int64_t axis = 0;
+    bool select_last_index = false;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.axis, "axis"));
+        return pack(f(self.axis, "axis"), f(self.select_last_index, "select_last_index"));
    }
    value attributes() const
@@ -78,6 +80,10 @@ struct argmin
                min_val   = cur_val;
                min_index = i;
            }
+            else if(select_last_index and float_equal(min_val, cur_val))
+            {
+                min_index = i;
+            }
        }
        return min_index;

--- a/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,25 +21,32 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
+#ifndef MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
-#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
+#define MIGRAPHX_GUARD_OPERATORS_ISINF_HPP
-#include <migraphx/program.hpp>
+#include <migraphx/op/unary.hpp>
-#include <migraphx/gpu/context.hpp>
+#include <migraphx/config.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
-namespace gpu {
+struct isinf : unary<isinf>
-struct MIGRAPHX_GPU_EXPORT pack_int8_args
 {
-    std::string name() const { return "gpu::pack_int8_args"; }
+    auto apply() const
-    void apply(module& m) const;
+    {
-    shape pack_int8_shape(const shape& s) const;
+        return [&](auto x) { return std::isinf(static_cast<double>(x)); };
+    }
+    std::string name() const { return "isinf"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        return unary<isinf>::compute_shape(std::move(inputs)).with_type(shape::bool_type);
+    }
 };
-} // namespace gpu
+} // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/op/multinomial.hpp
+++ b/src/include/migraphx/op/multinomial.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,11 +21,52 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+/**
+ *  * Multinomial or categorical distribution.  Performs a sampling of random input
+ *         and returns a count of
+ *         each category, or bucket.  This does not require the standard multinomial
+ *         distribution but instead takes a probability distribution, i.e. cumulative
+ *         distribution function (CDF) as its first input.
+ *
+ *      Inputs:   args[0] - a tensor of probabilities for each category.  Values are
+ *                          cumulative density function
+ *                          totals as provided by operation prefix_scan_sum.  Values are
+ *                          cumulative probabilities (i.e. start with any set of numbers > 0
+ *                          and then apply prefix_scan_sum).  Values do not need to be
+ *                          normalized to sum to 1; this is done in runtime computation.
+ *
+ *                          This input has Rank 2.  Dimension 0 is batch #, so that there can be
+ *                          a different CDF for each iteration in the batch.  The size of dimension
+ *                          1 is the number of categories.
+ *
+ *                args[1] - a tensor of random numbers.  The last dimension is the sample
+ *                          size, i.e. the number of
+ *                          random samples in each iteration of the batch.  Nominally
+ *                          has two dimensions where the first dimension is batch size, but
+ *                          any reshaping such that the total
+ *                          number of elements is (batch_size * sample_size) is legal.
+ *
+ *                          Values as created by a std::mt19937 like this:
+ *
+ *                           size_t sample_size = 100000;
+ *                           float seed         = 0.0f;
+ *                           std::mt19937 gen(seed);
+ *                           std::uniform_real_distribution<> dis(0.0, 1.0);
+ *                           std::vector<float> rand_samples(sample_size);
+ *                           std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return
+ *                                dis(gen); });
+ *
+ *        Output:   A 2D vector of category each input.  Dimensions are (Input 1[first], Input
+ 2[last]).
+ *
+*/
 #ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
 #define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP
-#include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/par_for.hpp>
 #include <migraphx/reflect.hpp>
 #include <random>
@@ -47,22 +88,35 @@ struct multinomial
    std::string name() const { return "multinomial"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2).only_dims(2);
+        check_shapes{inputs, *this, true}.has(2).only_dims(2);
-        size_t sample_size = inputs.back().lens().back();
-        if(not contains({shape::int32_type, shape::int64_type}, dtype))
+        if(inputs.back().ndim() < 1)
-            MIGRAPHX_THROW(
+            MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions");
-                "Multinomial: Invalid output type. Valid types are int32_type and int64_type.");
+        if(dtype == shape::bool_type)
+            MIGRAPHX_THROW("Multinomial: boolean output type invalid.");
-        return {dtype, {inputs.front().lens().front(), sample_size}};
+        // Output takes one dimension from each of the two input shapes.  If they are both fixed,
+        // return a static shape
+        if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed()))
+        {
+            if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed()))
+            {
+                size_t batch = {inputs.front().max_lens().front()};
+                size_t sample_size{inputs.back().max_lens().back()};
+                return {dtype, {batch, sample_size}};
+            }
+        }
+        return {dtype,
+                {inputs.front().to_dynamic().dyn_dims().front(),
+                 inputs.back().to_dynamic().dyn_dims().back()}};
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
-        size_t batch_size  = output_shape.lens().front();
+        size_t batch_size  = dyn_out.computed_shape.lens().front();
        size_t class_size  = args[0].get_shape().lens().back();
-        size_t sample_size = output_shape.lens().back();
+        size_t sample_size = dyn_out.computed_shape.lens().back();
        visit_all(args[0], args[1])([&](auto cdf, auto dist) {
            result.visit([&](auto output) {
@@ -70,13 +124,16 @@ struct multinomial
                    auto idx       = args[1].get_shape().multi(i);
                    auto cdf_begin = cdf.begin() + (idx[0] * class_size);
                    auto cdf_end   = cdf_begin + class_size;
+                    // std::upper_bound returns an iterator to the bucket the value belongs in,
+                    // when normalized by the probability distribution dist
                    auto sample_iter =
                        std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
+                    // convert iterator to an integer index
                    output[i] = std::distance(cdf_begin, sample_iter);
                });
            });
        });
        return result;
    }
 };

--- a/src/include/migraphx/op/round.hpp
+++ b/src/include/migraphx/op/round.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,24 +21,28 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_OPERATORS_ROUND_HPP
+#ifndef MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
-#define MIGRAPHX_GUARD_OPERATORS_ROUND_HPP
+#define MIGRAPHX_GUARD_OPERATORS_NEARBYINT_HPP
 #include <migraphx/op/unary.hpp>
 #include <migraphx/config.hpp>
+#include <fenv.h>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+struct nearbyint : unary<nearbyint>
-struct round : unary<round>
 {
    auto apply() const
    {
-        return [](auto x) { return std::round(x); };
+        return [](auto x) {
+            auto rounding_mode = fegetround();
+            fesetround(FE_TONEAREST);
+            return std::nearbyint(x);
+            fesetround(rounding_mode);
+        };
    }
 };
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/op/nonmaxsuppression.hpp
+++ b/src/include/migraphx/op/nonmaxsuppression.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
 #define MIGRAPHX_GUARD_OPERATORS_NONMAXSUPPRESSION_HPP
+#include <array>
 #include <cmath>
 #include <queue>
 #include <cstdint>

--- a/src/include/migraphx/op/normalize_attribute.hpp
+++ b/src/include/migraphx/op/normalize_attribute.hpp
@@ -40,6 +40,8 @@ namespace op {
 * 2. use_rank (default) vs use_len:
 *  `use_rank` sets the max value/index of the attribute as the rank of lens.
 *  `use_lens` sets the max value/index as the corresponding value in lens at the axes index.
+ *      Uses the dynamic_dimension.max value for dynamic shapes. Returns the original vector
+ *      (no normalization) if any of dynamic_dimension[axes] are not fixed.
 * 3. `clip_min` vs. `not_clip_min` (default):
 *  Clip values less than the minimum to the minimum or not.
 * 4. `include_min` vs. `exclude_min` (default):

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
@@ -411,7 +411,7 @@ struct pooling
            // for dynamic GlobalPooling, there's no padding
            kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
            output_shape = dyn_out.computed_shape;
-            result       = dyn_out.computed_shape;
+            result       = argument{dyn_out.computed_shape};
        }
        else if((padding_mode != op::padding_mode_t::default_))
        {
@@ -439,7 +439,7 @@ struct pooling
        {
            kernel_dims  = this->lengths;
            output_shape = dyn_out.computed_shape;
-            result       = dyn_out.computed_shape;
+            result       = argument{dyn_out.computed_shape};
        }
        // Perform the computation and populate result

--- a/src/include/migraphx/op/prefix_scan_op.hpp
+++ b/src/include/migraphx/op/prefix_scan_op.hpp
@@ -22,6 +22,12 @@
 * THE SOFTWARE.
 */
+/**
+ * Parent struct for prefix scan ops.  A prefix scan is a mathematical entity useful
+ * in parallelizing various computations.  Given a list of numbers, a prefix scan
+ * op returns an equal size list of running totals of the values.  Other operations
+ * besides addition can be supported by child ops.
+ */
 #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
 #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP

--- a/src/include/migraphx/op/quantizelinear.hpp
+++ b/src/include/migraphx/op/quantizelinear.hpp
@@ -30,11 +30,11 @@
 #include <migraphx/par_for.hpp>
 #include <migraphx/value.hpp>
 #include <cmath>
+#include <fenv.h>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
 struct quantizelinear
 {
    std::string name() const { return "quantizelinear"; }
@@ -71,26 +71,26 @@ struct quantizelinear
        {
            y_zero_point = args.at(2);
        }
        argument result{output_shape};
+        auto rounding_mode = fegetround();
+        fesetround(FE_TONEAREST);
        visit_all(result, y_zero_point)([&](auto output, auto zero_pts) {
            visit_all(x, y_scale)([&](auto input, auto scales) {
                using quant_type = typename decltype(output)::value_type;
                auto min_value   = std::numeric_limits<quant_type>::min();
                auto max_value   = std::numeric_limits<quant_type>::max();
                par_for(output_shape.elements(), [&](auto i) {
-                    int64_t quantized = static_cast<int64_t>(std::round(input[i] / scales[i])) +
+                    int64_t quantized = static_cast<int64_t>(std::nearbyint(input[i] / scales[i])) +
                                        static_cast<int64_t>(zero_pts[i]);
                    output[i] = std::max(static_cast<int64_t>(min_value),
                                         std::min(static_cast<int64_t>(max_value), quantized));
                });
            });
        });
+        fesetround(rounding_mode);
        return result;
    }
 };
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/op/random_uniform.hpp
+++ b/src/include/migraphx/op/random_uniform.hpp
@@ -65,11 +65,10 @@ struct random_uniform
        return inputs.at(1);
    }
-    argument compute(const shape&, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
        // Output goes into the passed buffer, not the shape output.
-        auto result = args[1];
+        argument result{dyn_out.computed_shape};
        uint64_t local_seed = args[0].at<uint64_t>(0);
        std::mt19937 gen(local_seed);
@@ -77,12 +76,27 @@ struct random_uniform
            using type = typename decltype(output)::value_type;
            if constexpr(std::is_integral<type>{})
            {
+#ifdef _MSC_VER
+                // According to the C++ specification, the effect is undefined if the result type
+                // for the generator is not one of short, int, long, long long, unsigned short,
+                // unsigned int, unsigned long, or unsigned long long. See
+                // https://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution.
+                if constexpr(sizeof(type) == 1)
+                {
+                    std::uniform_int_distribution<int> dis{std::numeric_limits<type>::min(),
+                                                           std::numeric_limits<type>::max()};
+                    std::generate(output.begin(), output.end(), [&] { return dis(gen); });
+                }
+                else
+#endif
+                {
                    // default range for all integer types is
                    // (0, std::uniform_int_distribution<type>::max()).
                    // Todo:  enable different ranges
                    std::uniform_int_distribution<type> dis;
                    std::generate(output.begin(), output.end(), [&] { return dis(gen); });
                }
+            }
            else
            {
                // default real distribution type is double with range (0, 1);

--- a/src/include/migraphx/op/reshape.hpp
+++ b/src/include/migraphx/op/reshape.hpp
@@ -36,6 +36,22 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * 1 input version:
+ * reshape(input_data)
+ * this.dims = output_dims
+ * Makes a copy of input_data to the output shape.
+ *
+ * 2 input version:
+ * reshape(input_data, output_buffer)
+ * this.dims = unset
+ * Copies input_data to output_buffer; output_buffer already has the output shape.
+ * This version will not fail gracefully if the input shape and output_buffer shape are
+ * incompatible. There's a throw that will catch when the number of elements do not match at
+ * runtime. This version should only be used for dynamic reshapes (output dimensions only known at
+ * runtime). If output_buffer has a static shape during compile/parse, you can use the 1 input
+ * version.
+ */
 struct reshape
 {
    std::vector<int64_t> dims;
@@ -215,13 +231,15 @@ struct reshape
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        check_shapes{inputs, *this, true}.has(1, 2);
        auto n_neg_dims = std::count(dims.begin(), dims.end(), -1);
        if(n_neg_dims > 1)
            MIGRAPHX_THROW("reshape: Dimensions for reshape can only have one -1 dim");
        auto s0 = inputs.front();
+        if(inputs.size() == 1)
+        {
            if(s0.dynamic())
            {
                return dyn_compute_shape(s0);
@@ -231,10 +249,17 @@ struct reshape
                return static_compute_shape(inputs, n_neg_dims);
            }
        }
+        else
+        {
+            return inputs.back();
+        }
+    }
    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
        assert(dyn_out.computed_shape.standard());
+        if(args.size() == 1)
+        {
            argument result{dyn_out.computed_shape};
            visit_all(result, args[0])([&](auto output, auto input) {
@@ -242,6 +267,21 @@ struct reshape
            });
            return result;
        }
+        else
+        {
+            // 2 arg
+            if(args[0].get_shape().elements() != args[1].get_shape().elements())
+            {
+                MIGRAPHX_THROW("Reshape: Number of elements must match at runtime. Input: " +
+                               std::to_string(args[0].get_shape().elements()) +
+                               " Output buffer: " + std::to_string(args[1].get_shape().elements()));
+            }
+            visit_all(args[1], args[0])([&](auto output, auto input) {
+                std::copy(input.begin(), input.end(), output.begin());
+            });
+            return args[1];
+        }
+    }
 };
 } // namespace op

--- a/src/include/migraphx/op/roialign.hpp
+++ b/src/include/migraphx/op/roialign.hpp
@@ -33,6 +33,7 @@
 #include <migraphx/dfor.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/shape_for_each.hpp>
+#include <array>
 #include <cmath>
 #include <numeric>
 #include <utility>

--- a/src/include/migraphx/op/scatter.hpp
+++ b/src/include/migraphx/op/scatter.hpp
@@ -66,7 +66,7 @@ struct scatter : op_name<Derived>
    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3).standard();
+        check_shapes{inputs, *this}.has(3);
        // If non-packed, this converts to a packed output while preserving permutation of tensor
        return inputs.front().with_lens(inputs.front().lens());
    }

--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -84,6 +84,7 @@
 #include <migraphx/op/mod.hpp>
 #include <migraphx/op/mul.hpp>
 #include <migraphx/op/multibroadcast.hpp>
+#include <migraphx/op/nearbyint.hpp>
 #include <migraphx/op/neg.hpp>
 #include <migraphx/op/nonmaxsuppression.hpp>
 #include <migraphx/op/nonzero.hpp>
@@ -110,7 +111,6 @@
 #include <migraphx/op/rnn_variable_seq_lens.hpp>
 #include <migraphx/op/rnn_var_sl_last_output.hpp>
 #include <migraphx/op/roialign.hpp>
-#include <migraphx/op/round.hpp>
 #include <migraphx/op/rsqrt.hpp>
 #include <migraphx/op/scalar.hpp>
 #include <migraphx/op/scatter_add.hpp>

--- a/src/include/migraphx/optional.hpp
+++ b/src/include/migraphx/optional.hpp
@@ -29,6 +29,17 @@
 #if defined(CPPCHECK)
 #define MIGRAPHX_HAS_OPTIONAL 1
 #define MIGRAPHX_HAS_OPTIONAL_TS 1
+#elif defined(_WIN32)
+#if _MSC_VER >= 1920
+#define MIGRAPHX_HAS_OPTIONAL 1
+#define MIGRAPHX_HAS_OPTIONAL_TS 0
+#elif _MSC_VER >= 1900
+#define MIGRAPHX_HAS_OPTIONAL 0
+#define MIGRAPHX_HAS_OPTIONAL_TS 1
+#else
+#define MIGRAPHX_HAS_OPTIONAL 0
+#define MIGRAPHX_HAS_OPTIONAL_TS 0
+#endif
 #elif defined(__has_include)
 #if __has_include(<optional>) && __cplusplus >= 201703L
 #define MIGRAPHX_HAS_OPTIONAL 1