Merge branch 'develop' into threaded_nms

40fbef9b · Ted Themistokleous · GitHub · d164b151 · aeb9f78c · 40fbef9b
Unverified Commit 40fbef9b authored Aug 05, 2023 by Ted Themistokleous Committed by GitHub Aug 05, 2023
20 changed files
--- a/src/include/migraphx/onnx.hpp
+++ b/src/include/migraphx/onnx.hpp
@@ -26,6 +26,7 @@
 #include <migraphx/program.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/onnx/export.h>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -54,15 +55,19 @@ struct onnx_options
 };
 /// Create a program from an onnx file
-program parse_onnx(const std::string& name, const onnx_options& = onnx_options{});
+MIGRAPHX_ONNX_EXPORT program parse_onnx(const std::string& name,
+                                        const onnx_options& = onnx_options{});
 /// Create a program from an onnx buffer
-program parse_onnx_buffer(const std::string& buffer, const onnx_options& options);
+MIGRAPHX_ONNX_EXPORT program parse_onnx_buffer(const std::string& buffer,
+                                               const onnx_options& options);
 /// Create a program from an onnx buffer
-program parse_onnx_buffer(const void* data, std::size_t size, const onnx_options& options);
+MIGRAPHX_ONNX_EXPORT program parse_onnx_buffer(const void* data,
+                                               std::size_t size,
+                                               const onnx_options& options);
-std::vector<std::string> get_onnx_operators();
+MIGRAPHX_ONNX_EXPORT std::vector<std::string> get_onnx_operators();
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/include/migraphx/op/broadcast.hpp
+++ b/src/include/migraphx/op/broadcast.hpp
@@ -37,10 +37,13 @@ namespace op {
 * 1 input version:
 * Broadcasts a tensor from the original shape to the broadcast_lens by setting the stride of
 * broadcasted dimensions to zero. `axis` attribute for a 1D input shape is the output dimension
- * that stays the same. ex: broadcasting shape [1024] -> [4, 1024, 3] has axis = 1 For higher rank
+ * that stays the same.
- * input shapes, axis is an offset parameter for the broadcasting. Such that this operator would
+ * ex: broadcasting shape [1024] -> [4, 1024, 3] has axis = 1.
- * work in the opposite direction of NumPy broadcasting. ex: broadcasting shape [2, 2] -> [2, 2, 3]
+ *
- * with axis = 0
+ * For higher rank input shapes, axis is an offset parameter for the broadcasting.
+ * Such that this operator would work in the opposite direction of NumPy broadcasting
+ * (left-most to rightwards element-wise comparison)
+ * ex: broadcasting shape [2, 2] -> [2, 2, 3] with axis = 0
 *
 * 2 input version:
 * Broadcast the first input 1D shape into the second input shape based on the axis parameter.
@@ -68,6 +71,9 @@ struct broadcast
        {
            // the ONNX broadcast op is deprecated now, so not handling the negative
            // value of axis anymore
+            if(s0.dynamic())
+                MIGRAPHX_THROW(
+                    "BROADCAST: Single dynamic input shape not supported.  Use two inputs.");
            if(axis >= broadcast_lens.size())
            {
                MIGRAPHX_THROW("BROADCAST : axis " + migraphx::to_string(axis) +

--- a/src/include/migraphx/op/clip.hpp
+++ b/src/include/migraphx/op/clip.hpp
@@ -25,12 +25,13 @@
 #define MIGRAPHX_GUARD_OPERATORS_CLIP_HPP
 #include <array>
+#include <cmath>
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/par_for.hpp>
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
-#include <cmath>
+#include <migraphx/dyn_output.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -48,15 +49,15 @@ struct clip
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3).same_type().same_dims();
+        check_shapes{inputs, *this, true}.has(3).same_type().same_dims();
        return inputs.front();
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        visit_all(result, args[0], args[1], args[2])([&](auto output, auto x, auto min, auto max) {
-            par_for(output_shape.elements(),
+            par_for(dyn_out.computed_shape.elements(),
                    [&](auto i) { output[i] = std::min(std::max(min[i], x[i]), max[i]); });
        });

--- a/src/include/migraphx/op/common.hpp
+++ b/src/include/migraphx/op/common.hpp
@@ -59,8 +59,8 @@ enum class rnn_direction
    bidirectional,
 };
-std::ostream& operator<<(std::ostream& os, pooling_mode v);
+MIGRAPHX_EXPORT std::ostream& operator<<(std::ostream& os, pooling_mode v);
-std::ostream& operator<<(std::ostream& os, rnn_direction v);
+MIGRAPHX_EXPORT std::ostream& operator<<(std::ostream& os, rnn_direction v);
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/include/migraphx/op/convert.hpp
+++ b/src/include/migraphx/op/convert.hpp
@@ -66,7 +66,19 @@ struct convert : unary<convert>
        auto type = target_type;
        return [type](auto x) {
            auto y = x;
-            shape::visit(type, [&](auto as) { y = std::min(std::max(as(x), as.min()), as.max()); });
+            shape::visit(type, [&](auto as) {
+                // clamping value between target_type's max and min doesn't work for NaNs,
+                if(std::isnan(x))
+                {
+                    y = as.nan();
+                }
+                else
+                {
+                    // clamp overflowing/underflowing values to min()/max() instead of +/-infinity
+                    // during downcasting
+                    y = std::min(std::max(as(x), as.min()), as.max());
+                }
+            });
            return y;
        };
    }

--- a/src/include/migraphx/op/convolution.hpp
+++ b/src/include/migraphx/op/convolution.hpp
@@ -79,17 +79,17 @@ struct convolution
        check_shapes{inputs, *this, true}.has(2).same_type().same_ndims().min_ndims(3);
        check_attribute_size();
        // num of dims of input and attribute should match
-        const auto input_size   = inputs[0].max_lens().size();
+        const auto input_ndim   = inputs[0].ndim();
        const auto padding_size = padding.size();
-        if(input_size != padding_size / 2 + 2 && input_size != padding_size + 2)
+        if(input_ndim != padding_size / 2 + 2 && input_ndim != padding_size + 2)
        {
            MIGRAPHX_THROW("CONVOLUTION: input and attribute size mismatch!");
        }
        const shape& x_shape          = inputs.at(0);
        const shape& w_shape          = inputs.at(1);
-        const size_t num_spatial_dims = input_size - 2;
+        const size_t num_spatial_dims = input_ndim - 2;
        if(num_spatial_dims != this->kdims())
        {
            MIGRAPHX_THROW("CONVOLUTION: input k-dims does not match attribute size");
@@ -105,7 +105,7 @@ struct convolution
        }
        else
        {
-            return fixed_compute_shape(x_shape, w_shape);
+            return static_compute_shape(x_shape, w_shape);
        }
    }
@@ -143,23 +143,10 @@ struct convolution
    shape dynamic_compute_shape(shape x_shape, shape w_shape) const
    {
        std::vector<shape::dynamic_dimension> output_dyn_dims = {};
+        output_dyn_dims.push_back(x_shape.to_dynamic().dyn_dims().at(0));
+        output_dyn_dims.push_back(w_shape.to_dynamic().dyn_dims().at(0));
-        auto dynamic_shape_push_back = [&](const shape& input_shape) {
+        const size_t num_spatial_dims = x_shape.ndim() - 2;
-            if(input_shape.dynamic())
-            {
-                output_dyn_dims.push_back(input_shape.dyn_dims().at(0));
-            }
-            else
-            {
-                auto l = input_shape.lens().at(0);
-                output_dyn_dims.push_back({l, l});
-            }
-        };
-        dynamic_shape_push_back(x_shape);
-        dynamic_shape_push_back(w_shape);
-        const size_t num_spatial_dims = x_shape.max_lens().size() - 2;
        if(padding_mode != default_)
        {
            for(std::size_t i = 0; i < num_spatial_dims; ++i)
@@ -198,7 +185,7 @@ struct convolution
        return shape{x_shape.type(), output_dyn_dims};
    }
-    shape fixed_compute_shape(shape x_shape, shape w_shape) const
+    shape static_compute_shape(shape x_shape, shape w_shape) const
    {
        std::vector<size_t> output_lens{x_shape.lens()[0], w_shape.lens()[0]};
        auto spatial_lens = calc_conv_lens(x_shape.lens(), w_shape.lens());

--- a/src/include/migraphx/op/deconvolution.hpp
+++ b/src/include/migraphx/op/deconvolution.hpp
@@ -21,9 +21,11 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_OPERATORS_DECONVOLUTION_HPP
+#ifndef MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_BACKWARDS_HPP
-#define MIGRAPHX_GUARD_OPERATORS_DECONVOLUTION_HPP
+#define MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_BACKWARDS_HPP
+#include <cmath>
+#include <utility>
 #include <migraphx/op/common.hpp>
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/config.hpp>
@@ -31,14 +33,13 @@
 #include <migraphx/argument.hpp>
 #include <migraphx/par_dfor.hpp>
 #include <migraphx/shape_for_each.hpp>
-#include <cmath>
+#include <migraphx/dyn_output.hpp>
-#include <utility>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-struct deconvolution
+struct convolution_backwards
 {
    std::vector<std::size_t> padding  = {0, 0};
    std::vector<std::size_t> stride   = {1, 1};
@@ -57,45 +58,91 @@ struct deconvolution
                    f(self.group, "group"));
    }
-    std::string name() const { return "deconvolution"; }
+    std::string name() const { return "convolution_backwards"; }
    void check_attribute_size() const
    {
-        if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
+        if(padding.size() != stride.size() or stride.size() != dilation.size())
-           stride.size() != dilation.size())
        {
-            MIGRAPHX_THROW("deconvolution: inconsistent attribute sizes");
+            MIGRAPHX_THROW("CONVOLUTION_BACKWARDS: inconsistent attribute sizes");
        }
    }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2).same_type().same_ndims().min_ndims(3);
+        check_shapes{inputs, *this, true}.has(2).same_type().same_ndims().min_ndims(3);
-        const shape& input   = inputs.at(0);
+        const shape& x_shape = inputs.at(0);
-        const shape& weights = inputs.at(1);
+        const shape& w_shape = inputs.at(1);
-        size_t kdims         = input.lens().size() - 2;
+        if(x_shape.ndim() - 2 != this->kdims())
-        if(kdims != this->kdims())
        {
-            MIGRAPHX_THROW("deconvolution: input k-dims does not match attribute size");
+            MIGRAPHX_THROW("CONVOLUTION_BACKWARDS: input k-dims does not match attribute size");
        }
-        std::vector<size_t> output_lens{input.lens()[0], weights.lens()[1]};
+        if(not x_shape.dynamic() and not w_shape.dynamic() and
+           x_shape.lens().at(1) != (w_shape.lens().at(0) * group))
+        {
+            MIGRAPHX_THROW("CONVOLUTION_BACKWARDS: mismatched channel numbers");
+        }
-        for(size_t i = 0; i < kdims; i++)
+        if(x_shape.dynamic() or w_shape.dynamic())
        {
-            output_lens.push_back(std::size_t(std::max<std::ptrdiff_t>(
+            return dynamic_compute_shape(x_shape, w_shape);
+        }
+        else
+        {
+            return static_compute_shape(x_shape, w_shape);
+        }
+    }
+    std::vector<std::size_t> calc_spatial_lens(std::vector<std::size_t> x_lens,
+                                               std::vector<std::size_t> w_lens) const
+    {
+        std::vector<size_t> spatial_lens(x_lens.size() - 2);
+        // stride * (input - 1) + output_padding + ((kernel - 1) * dilation + 1) - padding_L -
+        // padding_R. This assumes padding_L = padding_R and output_padding handled in parser.
+        for(size_t i = 0; i < spatial_lens.size(); i++)
+        {
+            spatial_lens.at(i) = (std::size_t(std::max<std::ptrdiff_t>(
                1,
-                stride[i] * (input.lens()[i + 2] - 1) +
+                stride[i] * (x_lens[i + 2] - 1) + ((w_lens[i + 2] - 1) * dilation[i] + 1) -
-                    ((weights.lens()[i + 2] - 1) * dilation[i] + 1) - 2 * padding[i])));
+                    2 * padding[i])));
        }
-        return inputs[0].with_lens(output_lens);
+        return spatial_lens;
+    }
+    shape dynamic_compute_shape(shape x_shape, shape w_shape) const
+    {
+        std::vector<shape::dynamic_dimension> output_dyn_dims = {};
+        output_dyn_dims.push_back(x_shape.to_dynamic().dyn_dims().at(0));
+        output_dyn_dims.push_back(w_shape.to_dynamic().dyn_dims().at(1));
+        const std::size_t num_spatial_dims = x_shape.ndim() - 2;
+        // Does not compute for optimals
+        auto min_spatial_dims = calc_spatial_lens(x_shape.min_lens(), w_shape.min_lens());
+        auto max_spatial_dims = calc_spatial_lens(x_shape.max_lens(), w_shape.max_lens());
+        for(size_t i = 0; i < num_spatial_dims; ++i)
+        {
+            output_dyn_dims.push_back(
+                shape::dynamic_dimension{min_spatial_dims[i], max_spatial_dims[i], {}});
+        }
+        return shape{x_shape.type(), output_dyn_dims};
+    }
+    shape static_compute_shape(shape x_shape, shape w_shape) const
+    {
+        std::vector<size_t> output_lens{x_shape.lens()[0], w_shape.lens()[1]};
+        auto spatial_lens = calc_spatial_lens(x_shape.lens(), w_shape.lens());
+        std::for_each(spatial_lens.begin(), spatial_lens.end(), [&output_lens](auto x) {
+            output_lens.push_back(x);
+        });
+        return x_shape.with_lens(output_lens);
    }
-    argument compute(shape output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
-        auto kdims = this->kdims();
+        auto num_spatial_dims = this->kdims();
        visit_all(result, args[0], args[1])([&](auto output, auto input, auto weights) {
            using type = typename decltype(output)::value_type;
@@ -109,22 +156,22 @@ struct deconvolution
            auto wei_n = wei[0];
            auto wei_c = wei[1];
-            auto out_lens = output_shape.lens();
+            auto out_lens = dyn_out.computed_shape.lens();
            std::vector<std::size_t> win_size{in_c};
            std::copy(in_lens.begin() + 2, in_lens.end(), std::back_inserter(win_size));
            std::copy(wei.begin() + 2, wei.end(), std::back_inserter(win_size));
-            shape win_shape{output_shape.type(), win_size};
+            shape win_shape{dyn_out.computed_shape.type(), win_size};
            par_dfor(in_n, wei_c)([&](int o, int k) {
                shape_for_each(win_shape, [&](auto idx_win) {
                    const int w = idx_win[0];
                    auto input_dims_start = idx_win.begin() + 1;
-                    auto wei_dims_start   = idx_win.begin() + kdims + 1;
+                    auto wei_dims_start   = idx_win.begin() + num_spatial_dims + 1;
                    std::vector<std::ptrdiff_t> win_start;
-                    for(std::size_t n = 0; n < kdims; ++n)
+                    for(std::size_t n = 0; n < num_spatial_dims; ++n)
                    {
                        win_start.push_back(std::ptrdiff_t(*(input_dims_start + n) * stride[n]) -
                                            std::ptrdiff_t(padding[n]));
@@ -135,7 +182,7 @@ struct deconvolution
                    std::vector<std::ptrdiff_t> idx_out{o, in_ch};
-                    for(size_t n = 0; n < kdims; n++)
+                    for(size_t n = 0; n < num_spatial_dims; n++)
                    {
                        idx_out.push_back(win_start[n] + *(wei_dims_start + n) * dilation[n]);
                    }

--- a/src/include/migraphx/op/dequantizelinear.hpp
+++ b/src/include/migraphx/op/dequantizelinear.hpp
@@ -37,6 +37,15 @@ namespace op {
 struct dequantizelinear
 {
+    value attributes() const
+    {
+        // Note: point_op attribute is not used in this op. Instead, in
+        // gpu compilation pipeline, rewrite_quantization will be invoked
+        // from generate_pointwise() to rewrite this op.
+        return {{"pointwise", true}};
+    }
    std::string name() const { return "dequantizelinear"; }
    shape compute_shape(std::vector<shape> inputs) const
    {

--- a/src/include/migraphx/op/dimensions_of.hpp
+++ b/src/include/migraphx/op/dimensions_of.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OPERATORS_DIMENSIONS_OF_HPP
+#define MIGRAPHX_GUARD_OPERATORS_DIMENSIONS_OF_HPP
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/dyn_output.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+/**
+ * Returns the dimensions of the input argument from starting axis to ending axis.
+ * Atleast `end` must be set to use this operator (set `end` to ndim for default ONNX behavior of
+ * `Shape` operator) This should only be used for dynamic shapes as this can be simplified to a
+ * literal for static shapes.
+ */
+struct dimensions_of
+{
+    std::size_t start = 0;
+    std::size_t end   = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.start, "start"), f(self.end, "end"));
+    }
+    std::string name() const { return "dimensions_of"; }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1);
+        if(start >= end)
+        {
+            MIGRAPHX_THROW("DIMENSIONS_OF: start >= end. start = " + std::to_string(start) +
+                           ", end = " + std::to_string(end));
+        }
+        return shape{shape::int64_type, {end - start}};
+    }
+    argument compute(const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        auto input_lens = args[0].get_shape().lens();
+        result.visit([&](auto output) {
+            std::copy(input_lens.cbegin() + start, input_lens.cbegin() + end, output.begin());
+        });
+        return result;
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/multibroadcast.hpp
+++ b/src/include/migraphx/op/multibroadcast.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -36,9 +36,9 @@ namespace op {
 /**
 * Broadcast multiple dimensions between two tensors.
- * Two versions of this operator: one input and two inputs.
+ * Two versions of this operator: 1 input and 2+ inputs.
 * One input version uses output_lens attribute and broadcasts to it.
- * Two inputs version broadcasts both inputs to the common shape at evaluation time.
+ * 2+ inputs version broadcasts first input to the common shape at evaluation time.
 */
 struct multibroadcast
 {
@@ -57,19 +57,19 @@ struct multibroadcast
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this, true}.has(1, 2);
+        check_shapes{inputs, *this, true}.has_at_least(1);
        auto t  = inputs.at(0).type();
        auto s0 = inputs.at(0);
-        if(s0.max_lens().empty())
+        if(s0.ndim() < 1)
        {
            MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should be > 0");
        }
        auto make_bcast_strides = [&](std::vector<std::size_t> bcast_lens, std::size_t offset) {
            std::vector<size_t> bcast_strides(bcast_lens.size(), 0);
-            for(std::ptrdiff_t i = s0.lens().size() - 1; i >= 0; i--)
+            for(std::ptrdiff_t i = s0.ndim() - 1; i >= 0; i--)
            {
                if(bcast_lens[i + offset] == s0.lens()[i])
                {
@@ -81,13 +81,16 @@ struct multibroadcast
        if(inputs.size() == 1)
        {
-            if(s0.lens().size() > output_lens.size())
+            if(s0.dynamic())
+                MIGRAPHX_THROW(
+                    "MULTIBROADCAST: Single dynamic input shape not supported.  Use two inputs.");
+            if(s0.ndim() > output_lens.size())
            {
                MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should <= output size");
            }
-            auto offset = output_lens.size() - s0.lens().size();
+            auto offset = output_lens.size() - s0.ndim();
-            for(std::ptrdiff_t i = s0.lens().size() - 1; i >= 0; i--)
+            for(std::ptrdiff_t i = s0.ndim() - 1; i >= 0; i--)
            {
                if(output_lens[i + offset] != s0.lens()[i] and s0.lens()[i] != 1)
                {
@@ -102,20 +105,21 @@ struct multibroadcast
        }
        else
        {
-            // two inputs
+            // 2+ inputs
-            auto s1 = inputs.at(1);
+            if(std::any_of(
-            if(s0.dynamic() or s1.dynamic())
+                   inputs.cbegin(), inputs.cend(), [](auto input) { return input.dynamic(); }))
            {
                if(not output_dyn_dims.empty())
                {
                    return {t, output_dyn_dims};
                }
-                return {t, compute_broadcasted_dyn_dims(s0, s1)};
+                return {t, compute_common_dyn_dims(inputs)};
            }
            else
            {
-                auto bcast_lens    = compute_broadcasted_lens(s0.lens(), s1.lens());
+                // output_lens will not be set for 2+ input version
-                auto offset        = bcast_lens.size() - s0.lens().size();
+                auto bcast_lens    = compute_common_lens(inputs);
+                auto offset        = bcast_lens.size() - s0.ndim();
                auto bcast_strides = make_bcast_strides(bcast_lens, offset);
                return {t, std::move(bcast_lens), std::move(bcast_strides)};
            }

--- a/src/include/migraphx/op/pointwise.hpp
+++ b/src/include/migraphx/op/pointwise.hpp
@@ -45,14 +45,15 @@ struct pointwise
        {
            MIGRAPHX_THROW("should have one submodule.");
        }
-        auto* pm    = mods.front();
+        auto* pm = mods.front();
+        if(pm->get_output_shapes().size() != 1)
+            MIGRAPHX_THROW("pointwise should have only one output.");
+        if(inputs.empty())
+            MIGRAPHX_THROW("pointwise should have at least one input");
        auto pnames = pm->get_parameter_names();
        std::sort(pnames.begin(), pnames.end());
        check_shapes{inputs, *this}.has(pnames.size()).same_dims();
-        if(pm->get_output_shapes().size() != 1)
-            MIGRAPHX_THROW("submodule should have only one output.");
        auto type = pm->get_output_shapes().front().type();
        // Scalar output if all inputs are scalar

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -42,16 +42,43 @@ namespace op {
 struct pooling
 {
-    pooling_mode mode                = {pooling_mode::average};
+    pooling_mode mode = {pooling_mode::average};
+    // Padding along each spatial input dimension
+    // Can be ndim or 2*ndim values where ndim is size of lengths
+    // ndim values means pad the same before and after each dimension
+    // 2*ndim values contains n pre and then n post padding values
    std::vector<std::size_t> padding = {0, 0};
-    std::vector<std::size_t> stride  = {1, 1};
+    // Size of stride to take from one placement of the pooling kernel to the next.
+    // This is distinct from the strides used by the shape class.  Must be the same
+    // ndim as lengths.
+    std::vector<std::size_t> stride = {1, 1};
+    // Spatial dimensions of the pooling kernel or window,
+    // 2 smaller than the input tensor rank (NCHW layout)
    std::vector<std::size_t> lengths = {1, 1};
-    bool ceil_mode                   = false;
-    int lp_order                     = 2;
+    // Dilations are not supported at this time.
+    // ceiling mode is a flag affecting output size
+    // or equivalently, placements of the pooling kernel.
+    // When true, round the size upwards, possibly
+    // including partial placements where the kernel extends beyond the edge
+    // of input and even padding.  When false, round down so that all
+    // kernel placements fit but some input values may be dropped.
+    bool ceil_mode = false;
+    int lp_order   = 2;
    // Global pooling with dynamic shape input
    bool dyn_global = false;
+    // an attribute of the Onnx pooling operator, not currently enabled here because MIOpen can't
+    // support it. We currently implement padding for average pooling by inserting a Padding
+    // operator during Onnx parsing. But to support dynamic shape inputs and count_include_pad
+    // together, it would be necessary to do this calculation at runtime in MIOpen.
+    bool count_include_pad = false;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
@@ -68,11 +95,29 @@ struct pooling
    void check_attribute_size() const
    {
-        if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
+        if(dyn_global)
-           (not dyn_global and stride.size() != lengths.size()))
+            return;
+        if((padding.size() != stride.size() and (padding.size()) != stride.size() * 2) or
+           stride.size() != lengths.size())
        {
            MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
        }
+        if(std::any_of(lengths.begin(), lengths.end(), [&](auto i) { return (i == 0); }) or
+           std::any_of(stride.begin(), stride.end(), [&](auto i) { return (i == 0); }))
+        {
+            MIGRAPHX_THROW("POOLING: size 0 pooling kernel or stride");
+        }
+        // TODO:  update lowering to run the reference
+        // code when OneDNN can't execute pooling for a CPU
+        // OneDNN has a limitation on padding size for pooling.  see
+        // https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#doxid-dev-guide-convolution
+        // padding = {2}; stride = {1}; lengths = {3} succeeds in oneDNN but
+        // padding = {2}; stride = {1}; lengths = {2} fails.
+        // Also, the referenced documentation contains a max. dimension size of 14 for the kernel
+        // ("weights tensor") that MIGraphX doesn't enforce.
    }
    size_t kdims() const
@@ -112,7 +157,11 @@ struct pooling
        const shape& input = inputs.at(0);
        auto padding_size  = padding.size();
        size_t kdims       = input.ndim() - 2;
-        if(input.ndim() != padding_size / 2 + 2 and input.ndim() != padding_size + 2)
+        if(input.ndim() < 3)
+        {
+            MIGRAPHX_THROW("POOLING: input must have 3 or more dimensions and be nonempty");
+        }
+        if(input.ndim() * 2 != padding_size + 4 and input.ndim() != padding_size + 2)
        {
            MIGRAPHX_THROW("POOLING: input and attribute size mismatch!");
        }
@@ -132,7 +181,7 @@ struct pooling
            }
            else
            {
-                // does not compute for optimals
+                // does not compute optimals
                auto min_spatial_dims = calc_spatial_dim_out(input.min_lens(), kdims);
                auto max_spatial_dims = calc_spatial_dim_out(input.max_lens(), kdims);
                for(size_t i = 0; i < kdims; ++i)
@@ -149,7 +198,7 @@ struct pooling
            std::vector<std::size_t> output_lens(input_lens.begin(), input_lens.begin() + 2);
            // Used for when normalize_compute_shape() is called again at model eval time
-            // for an originally dynamic shape. Since kernel shape is not used with dyn_global.
+            // for an originally dynamic shape. Kernel shape is not used with dyn_global.
            if(dyn_global)
            {
                for(size_t i = 0; i < kdims; ++i)
@@ -184,7 +233,7 @@ struct pooling
        double operator()(double x, double y) const { return x + std::pow(std::abs(y), p); }
-        double final(double x, std::size_t) const { return std::pow(x, 1. / p); }
+        double final(double x, std::size_t) const { return (p == 0) ? 1 : std::pow(x, 1. / p); }
    };
    struct avg_pool
@@ -222,37 +271,82 @@ struct pooling
    {
        auto in_s    = input.get_shape();
        auto in_lens = in_s.lens();
+        // For each element of output; i.e., for each placement of pooling kernel...
        par_for(output_shape.elements(), [&](auto i) {
            auto idx_o = output_shape.multi(i);
            auto n_dim = idx_o.size();
-            std::vector<std::size_t> win_start;
+            // starting offset of the pooling window
+            std::vector<int> win_start;
            std::vector<std::size_t> win_size;
+            // For each spatial dimension, find starting and ending index of pooling kernel
            for(std::size_t dim = 2; dim < n_dim; ++dim)
            {
                auto d_2 = dim - 2;
                int start =
                    static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]);
-                int end = std::min(start + kernel_dims[d_2], in_lens[dim]);
+                int end;
-                start   = std::max(start, 0);
+                // NOLINT
+                if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
+                {
+                    // TODO: this block can't execute until we enable count_include_pad
+                    // Even when using padding, if in ceil_mode a window
+                    // could extend beyond the end of both input and
+                    // padding.  Clip out-of-bounds indexes but not padding.
+                    // Check if this kernel extends beyond the padding at end of dimension
+                    end = std::min(start + kernel_dims[d_2],
+                                   in_lens[dim] + static_cast<int>(padding[d_2]));
+                }
+                else
+                {
+                    // In non-ceiling mode, when
+                    // count_include_pad is false, or for max pooling, clip off padding.
+                    end   = std::min(start + kernel_dims[d_2], in_lens[dim]);
+                    start = std::max(start, 0);
+                }
                win_start.push_back(start);
+                if(end < start)
+                {
+                    // This error can be caused by misc. bad input combinations
+                    MIGRAPHX_THROW("POOLING:  invalid attributes");
+                }
                win_size.push_back(end - start);
            }
            shape win_shape{output_shape.type(), win_size};
            auto pool_size    = win_shape.elements();
            double output_val = op.template init<Type>();
+            // for each element in the window...
            shape_for_each(win_shape, [&](auto idx_w) {
+                // the coordinates of this element
                auto idx = idx_o;
+                // Add the kernel location idx_w and the offset win_start, for each dimension.
+                // Negative results are cast to very large unsigned integers.
                std::transform(idx_w.begin(),
                               idx_w.end(),
                               win_start.begin(),
                               idx.begin() + 2,
                               [](auto ii, auto jj) { return ii + jj; });
-                if(std::all_of(idx.begin() + 2, idx.end(), [&](auto ii) { return ii >= 0; }) and
+                // Check if any of coordinates are out of input tensor's range
-                   idx < in_lens)
+                if(std::mismatch(idx.begin() + 2,
+                                 idx.end(),
+                                 in_lens.begin() + 2,
+                                 in_lens.end(),
+                                 std::less<>{}) == std::make_pair(idx.end(), in_lens.end()))
                {
                    output_val = op(output_val, input[in_s.index(idx)]);
                }
+                else
+                {
+                    // this is a padding element.  Padding locations
+                    // don't contribute to average or max pooling total but can play in
+                    // lpnorm pooling.
+                    output_val = op(output_val, 0);
+                }
            });
            output[i] = Type(op.final(output_val, pool_size));
        });

--- a/src/include/migraphx/op/prefix_scan_op.hpp
+++ b/src/include/migraphx/op/prefix_scan_op.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
 #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
 #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP
@@ -37,6 +38,12 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Parent struct for prefix scan operations.  A prefix scan is equivalent to the C++
+ * std::exclusive_scan or std::inclusive_scan.  Given a list of numbers, a prefix scan
+ * sum op returns an equal size list of running totals of the values.  Other operations
+ * besides addition can be supported by their own child ops.
+ */
 template <class Derived>
 struct prefix_scan_op : op_name<Derived>
 {
@@ -60,9 +67,13 @@ struct prefix_scan_op : op_name<Derived>
    shape normalize_compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
+        check_shapes{inputs, *this, true}.has(1);
        auto s = inputs.front();
-        if(s.broadcasted())
+        if(s.dynamic())
+        {
+            return s;
+        }
+        else if(s.broadcasted())
        {
            return {s.type(), s.lens()};
        }
@@ -72,8 +83,9 @@ struct prefix_scan_op : op_name<Derived>
        }
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
+        shape output_shape(dyn_out.computed_shape);
        argument result{output_shape};
        auto s = args[0].get_shape();
        if(s == output_shape)

--- a/src/include/migraphx/op/quantizelinear.hpp
+++ b/src/include/migraphx/op/quantizelinear.hpp
@@ -38,6 +38,15 @@ namespace op {
 struct quantizelinear
 {
    std::string name() const { return "quantizelinear"; }
+    value attributes() const
+    {
+        // Note: point_op attribute is not used in this op. Instead, in
+        // gpu compilation pipeline, rewrite_quantization will be invoked
+        // from generate_pointwise() to rewrite this op.
+        return {{"pointwise", true}};
+    }
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs, *this}.same_dims().has(2, 3);

--- a/src/include/migraphx/op/reshape.hpp
+++ b/src/include/migraphx/op/reshape.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/dyn_output.hpp>
+#include <migraphx/optional.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -96,9 +97,115 @@ struct reshape
        return {s0.type(), output_dyn_dims};
    }
+    template <class Iterator>
+    static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim)
+    {
+        std::size_t x = 1;
+        auto it       = std::find_if(start, last, [&](auto i) {
+            x *= i;
+            return x >= dim;
+        });
+        if(x != dim)
+            return start;
+        return it;
+    }
+    template <class DimIterator, class StrideIterator>
+    static auto can_strides_merge(DimIterator dim_start,
+                                  DimIterator dim_last,
+                                  StrideIterator stride_start,
+                                  StrideIterator stride_last)
+    {
+        assert(std::distance(dim_start, dim_last) == std::distance(stride_start, stride_last));
+        auto cstride = *std::prev(stride_last);
+        return std::equal(std::make_reverse_iterator(dim_last),
+                          std::make_reverse_iterator(dim_start + 1),
+                          std::make_reverse_iterator(stride_last - 1),
+                          std::make_reverse_iterator(stride_start),
+                          [&](auto dim, auto stride) {
+                              cstride *= dim;
+                              return stride == cstride;
+                          });
+    }
+    // This will reshape the dimesions of the input shape to use the lens of
+    // `rdims`. If this can't be done without changing memory layout then it
+    // will return nullopt
+    static optional<shape> reshape_dims(const shape& input, const std::vector<std::size_t>& rdims)
+    {
+        if(input.standard())
+            return shape{input.type(), rdims};
+        const auto& idims    = input.lens();
+        const auto& istrides = input.strides();
+        std::vector<std::size_t> rstrides;
+        std::size_t i = 0;
+        std::size_t r = 0;
+        while(i < idims.size() and r < rdims.size())
+        {
+            auto idim = idims[i];
+            auto rdim = rdims[r];
+            if(rdim == idim)
+            {
+                rstrides.push_back(istrides[i]);
+            }
+            // squeeze
+            else if(rdim > idim)
+            {
+                auto start = idims.begin() + i;
+                auto it    = compute_end_dim(start, idims.end(), rdim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                assert((i + n) <= istrides.size());
+                if(not can_strides_merge(
+                       start, it + 1, istrides.begin() + i, istrides.begin() + i + n + 1))
+                    return nullopt;
+                i += n;
+                rstrides.push_back(istrides[i]);
+            }
+            // unsqueeze
+            else // if(rdim < idim)
+            {
+                auto start = rdims.begin() + i;
+                auto it    = compute_end_dim(start, rdims.end(), idim);
+                if(it == start)
+                    return nullopt;
+                auto n = it - start;
+                assert((r + n) <= rdims.size());
+                auto stride = istrides[i] * idim;
+                std::for_each(start, it + 1, [&](auto dim) {
+                    stride /= dim;
+                    rstrides.push_back(stride);
+                });
+                r += n;
+            }
+            i++;
+            r++;
+        }
+        // Handle trailing 1s
+        if(rstrides.size() < rdims.size() and not rstrides.empty())
+        {
+            auto stride = rstrides.back();
+            for(auto d : range(rdims.begin() + rstrides.size(), rdims.end()))
+            {
+                if(d != 1)
+                    return nullopt;
+                rstrides.push_back(stride);
+            }
+        }
+        if(rdims.size() != rstrides.size())
+            return nullopt;
+        return shape{input.type(), rdims, rstrides};
+    }
    shape static_compute_shape(std::vector<shape> inputs, std::size_t n_neg_dims) const
    {
-        check_shapes{inputs, *this}.standard();
+        check_shapes{inputs, *this}.has(1);
        auto&& idims = inputs.front().lens();
        std::vector<std::size_t> rdims(dims.begin(), dims.end());
@@ -125,12 +232,17 @@ struct reshape
            }
        }
-        shape s{inputs.front().type(), rdims};
+        auto s = reshape_dims(inputs.front(), rdims);
-        if(s.elements() != inputs.front().elements())
+        if(not s.has_value())
+            MIGRAPHX_THROW("Reshape on axis that is not packed.");
+        if(s->elements() != inputs.front().elements())
            MIGRAPHX_THROW("Reshape: Wrong number of elements for reshape: reshape has " +
-                           std::to_string(s.elements()) + " elements whereas the input has " +
+                           std::to_string(s->elements()) + " elements whereas the input has " +
                           std::to_string(inputs.front().elements()));
-        return s;
+        assert(s->bytes() == inputs.front().bytes());
+        return *s;
    }
    shape compute_shape(std::vector<shape> inputs) const

--- a/src/include/migraphx/op/run_on_target.hpp
+++ b/src/include/migraphx/op/run_on_target.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_RUN_ON_TARGET_HPP
+#define MIGRAPHX_GUARD_RTGLIB_RUN_ON_TARGET_HPP
+#include <unordered_map>
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <migraphx/config.hpp>
+#include <migraphx/errors.hpp>
+#include <migraphx/shape.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/module.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct run_on_target
+{
+    std::size_t target_id = 0;
+    std::string name() const { return "run_on_target"; }
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.target_id, "target_id"));
+    }
+    migraphx::shape compute_shape(const std::vector<migraphx::shape>& inputs,
+                                  std::vector<migraphx::module_ref> mods) const
+    {
+        if(mods.size() != 1)
+        {
+            MIGRAPHX_THROW("RUN_ON_TARGET: must have exactly 1 module argument");
+        }
+        auto* mod_input = mods.front();
+        if(inputs.size() != mod_input->get_parameter_shapes().size())
+        {
+            MIGRAPHX_THROW("RUN_ON_TARGET: Mismatched number of input parameters");
+        }
+        auto mod_out_shapes = mod_input->get_output_shapes();
+        return mod_out_shapes;
+    }
+    migraphx::argument
+    compute(const migraphx::shape&,
+            const std::vector<migraphx::argument>& args,
+            const std::vector<migraphx::module_ref>& mods,
+            const std::function<std::vector<migraphx::argument>(
+                migraphx::module_ref&, const std::unordered_map<std::string, migraphx::argument>&)>&
+                run) const
+    {
+        std::unordered_map<std::string, migraphx::argument> params;
+        std::set<std::string> pnames;
+        const auto* smod = mods.front();
+        assert(mods.size() == 1);
+        auto names = smod->get_parameter_names();
+        pnames.insert(names.begin(), names.end());
+        assert(pnames.size() == args.size());
+        std::transform(pnames.begin(),
+                       pnames.end(),
+                       args.begin(),
+                       std::inserter(params, params.end()),
+                       [](auto&& name, auto&& arg) { return std::make_pair(name, arg); });
+        auto* mod    = mods.front();
+        auto results = run(mod, params);
+        return migraphx::argument{results};
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif
--- a/src/include/migraphx/op/select_module.hpp
+++ b/src/include/migraphx/op/select_module.hpp
@@ -125,7 +125,7 @@ struct select_module
                           auto ps = param_shapes.at(name);
                           if(a.get_shape() != ps)
                           {
-                               assert(ps.bytes() == a.get_shape().bytes());
+                               assert(ps.bytes() <= a.get_shape().bytes());
                               return std::make_pair(name, a.reshape(ps));
                           }
                           else

--- a/src/include/migraphx/op/unsqueeze.hpp
+++ b/src/include/migraphx/op/unsqueeze.hpp
@@ -95,13 +95,10 @@ struct unsqueeze
            auto type        = input_shape.type();
            auto old_lens    = input_shape.lens();
            auto old_strides = input_shape.strides();
-            if(input_shape.scalar())
+            auto is_scalar   = input_shape.scalar();
-            {
-                if(old_lens.size() == 1 and old_lens.front() == 1)
+            if(is_scalar and old_lens.size() == 1 and old_lens.front() == 1)
-                    return shape{type, old_lens};
+                return shape{type, old_lens};
-                else
-                    MIGRAPHX_THROW("UNSQUEEZE: Input must be a scalar");
-            }
            if(steps.size() > axes.size())
                MIGRAPHX_THROW("UNSQUEEZE: Steps provided with no axis");
@@ -121,13 +118,15 @@ struct unsqueeze
                        step = steps[axis_idx];
                    if(step == 0)
                        MIGRAPHX_THROW("UNSQUEEZE: step must be non-zero");
+                    if(is_scalar and step != 1)
+                        MIGRAPHX_THROW("UNSQUEEZE: step must be 1 when input is scalar");
                    new_lens[i] = step;
                    if(p < old_strides.size())
                    {
                        if((old_lens[p] % step) != 0)
                            MIGRAPHX_THROW("UNSQUEEZE: Axis dimenstion is not divisible by step");
                        old_lens[p] /= step;
-                        new_strides[i] = old_strides[p] * old_lens[p];
+                        new_strides[i] = is_scalar ? 1 : old_strides[p] * old_lens[p];
                    }
                    else
                    {

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -143,7 +143,7 @@ auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
    if(inputs.empty())
        MIGRAPHX_THROW("At least one input is required for " + x.name());
    dependent_type<operation, T> y = x;
-    normalize_attributes(y, inputs[0].max_lens());
+    normalize_attributes(y, inputs[0]);
    return any_cast<T>(y).normalize_compute_shape(inputs);
 }
@@ -251,9 +251,10 @@ auto compute_op(rank<1>,
                const shape& output,
                const std::vector<argument>& inputs,
                const std::vector<module_ref>& module_args,
-                F f)
+                F f) -> decltype(x.compute(make_compute_output_shape(pack(x, output, inputs)),
-    -> decltype(
+                                           inputs,
-        x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
+                                           module_args,
+                                           f))
 {
    return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
 }
@@ -261,11 +262,13 @@ auto compute_op(rank<1>,
 template <class T, class F>
 argument compute_op(rank<0>,
                    const T& x,
-                    const shape&,
+                    const shape& output,
-                    const std::vector<argument>&,
+                    const std::vector<argument>& inputs,
-                    const std::vector<module_ref>&,
+                    const std::vector<module_ref>& module_args,
                    F)
 {
+    if(module_args.empty())
+        return compute_op(x, output, inputs);
    std::string name = x.name();
    MIGRAPHX_THROW("Not computable: " + name);
 }
@@ -307,9 +310,10 @@ auto compute_op(rank<3>,
                const shape& output,
                const std::vector<argument>& inputs,
                const std::vector<module_ref>& module_args,
-                F f)
+                F f) -> decltype(x.compute(make_compute_output_shape(pack(x, output, inputs)),
-    -> decltype(
+                                           inputs,
-        x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
+                                           module_args,
+                                           f))
 {
    return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
 }
@@ -497,7 +501,7 @@ lifetime get_lifetime_op(const T&)
 #ifdef TYPE_ERASED_DECLARATION
 // Type-erased interface for:
-struct operation
+struct MIGRAPHX_EXPORT operation
 {
    //
    std::string name() const;
@@ -571,7 +575,7 @@ struct operation
    {
        using std::swap;
        auto* derived = this->any_cast<PrivateDetailTypeErasedT>();
-        if(derived and private_detail_te_handle_mem_var.unique())
+        if(derived and private_detail_te_handle_mem_var.use_count() == 1)
        {
            *derived = std::forward<PrivateDetailTypeErasedT>(value);
        }
@@ -1261,7 +1265,7 @@ struct operation
    private_detail_te_handle_base_type& private_detail_te_get_handle()
    {
        assert(private_detail_te_handle_mem_var != nullptr);
-        if(not private_detail_te_handle_mem_var.unique())
+        if(private_detail_te_handle_mem_var.use_count() > 1)
            private_detail_te_handle_mem_var = private_detail_te_handle_mem_var->clone();
        return *private_detail_te_handle_mem_var;
    }
@@ -1388,8 +1392,8 @@ bool has_finalize(const T& x)
    return detail::has_finalize_op(x);
 }
-void migraphx_to_value(value& v, const operation& op);
+MIGRAPHX_EXPORT void migraphx_to_value(value& v, const operation& op);
-void migraphx_from_value(const value& v, operation& op);
+MIGRAPHX_EXPORT void migraphx_from_value(const value& v, operation& op);
 #endif

--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -45,9 +45,10 @@
 #include <migraphx/op/contiguous.hpp>
 #include <migraphx/op/convert.hpp>
 #include <migraphx/op/convolution.hpp>
+#include <migraphx/op/convolution_backwards.hpp>
 #include <migraphx/op/cosh.hpp>
 #include <migraphx/op/cos.hpp>
-#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/op/dimensions_of.hpp>
 #include <migraphx/op/div.hpp>
 #include <migraphx/op/dot.hpp>
 #include <migraphx/op/elu.hpp>