Merge from develop

15a7d96a · Paul · 4c370d64 · eb094e57 · 15a7d96a · 15a7d96a
Commit 15a7d96a authored Nov 29, 2022 by Paul
20 changed files
--- a/src/include/migraphx/argument.hpp
+++ b/src/include/migraphx/argument.hpp
@@ -107,6 +107,7 @@ struct argument : raw_data<argument>
    data_t m_data{};
 };
+std::vector<shape> to_shapes(const std::vector<argument>& args);
 void migraphx_to_value(value& v, const argument& a);
 void migraphx_from_value(const value& v, argument& a);

--- a/src/include/migraphx/check_shapes.hpp
+++ b/src/include/migraphx/check_shapes.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_CHECK_SHAPES_HPP
 #define MIGRAPHX_GUARD_RTGLIB_CHECK_SHAPES_HPP
+#include <migraphx/permutation.hpp>
 #include <migraphx/shape.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/stringutils.hpp>
@@ -232,6 +233,19 @@ struct check_shapes
        return *this;
    }
+    /*!
+     * Check all shapes are packed with certain layouts
+     */
+    const check_shapes&
+    packed_layouts(const std::initializer_list<std::vector<int64_t>>& layouts) const
+    {
+        if(not this->all_of([&](const shape& s) {
+               return s.packed() and contains(layouts, find_permutation(s));
+           }))
+            MIGRAPHX_THROW(prefix() + "Shapes are not packed with correct layout");
+        return *this;
+    }
    /*!
     * Check all shapes are packed or broadcasted.
     */

--- a/src/include/migraphx/common.hpp
+++ b/src/include/migraphx/common.hpp
@@ -36,6 +36,9 @@ struct operation;
 std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
                                                  std::vector<std::size_t> s1);
+std::vector<shape::dynamic_dimension> compute_broadcasted_dyn_dims(shape s0, shape s1);
 shape common_shape(const std::vector<shape>& shapes);
 instruction_ref insert_common_op(module& m,

--- a/src/targets/gpu/include/migraphx/gpu/leaky_relu.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/leaky_relu.hpp
@@ -21,44 +21,55 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_DYN_OUTPUT_HPP
-#define MIGRAPHX_GUARD_RTGLIB_LEAKY_RELU_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_DYN_OUTPUT_HPP
-#include <migraphx/op/leaky_relu.hpp>
 #include <migraphx/shape.hpp>
-#include <migraphx/reflect.hpp>
+#include <migraphx/argument.hpp>
-#include <migraphx/gpu/miopen.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-struct context;
+struct dyn_output
+{
+    // original shape from the instruction
+    shape ins_shape;
+    // shape computed at eval time using input arguments
+    shape computed_shape;
+};
-struct miopen_leaky_relu
+/**
+ * Handle dynamic and static shape at evaluation time.
+ * If converted to shape type, returns original ins_shape.
+ * If converted to dyn_output type, will compute an output shape using the input arguments.
+ */
+template <class F>
+struct compute_output_shape
 {
-    op::leaky_relu op;
+    F ins_inputs;
-    shared<activation_descriptor> ad;
-    template <class Self, class F>
+    operator dyn_output() const
-    static auto reflect(Self& self, F f)
    {
-        return migraphx::reflect(self.op, f);
+        return ins_inputs([](const auto& x, shape ins_shape, const std::vector<argument>& inputs) {
+            if(ins_shape.dynamic())
+                return dyn_output{ins_shape, compute_shape(x, to_shapes(inputs))};
+            return dyn_output{ins_shape, ins_shape};
+        });
    }
-    std::string name() const { return "gpu::leaky_relu"; }
+    operator shape() const
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument
-    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    void finalize(context&, const shape&, const std::vector<shape>&);
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {
-        return shapes.size() - 1;
+        return ins_inputs(
+            [](const auto&, shape ins_shape, const std::vector<argument>&) { return ins_shape; });
    }
 };
-} // namespace gpu
+template <class F>
+compute_output_shape<F> make_compute_output_shape(F f)
+{
+    return {f};
+}
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
 #endif
--- a/src/include/migraphx/rewrite_batchnorm.hpp
+++ b/src/include/migraphx/rewrite_batchnorm.hpp
@@ -21,8 +21,8 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_FWD_CONV_BATCHNORM_REWRITE_HPP
+#ifndef MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
-#define MIGRAPHX_GUARD_RTGLIB_FWD_CONV_BATCHNORM_REWRITE_HPP
+#define MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
 #include <string>
 #include <migraphx/instruction_ref.hpp>
@@ -31,18 +31,17 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
-struct module;
+struct module_pass_manager;
 /**
- * Rewrite batchnorm to a multiply and add.
+ * Transform convolutions to nhwc
 */
-struct rewrite_batchnorm
+struct layout_nhwc
 {
-    std::string name() const { return "rewrite_batchnorm"; }
+    std::string name() const { return "layout_nhwc"; }
-    void apply(module& m) const;
+    void apply(module_pass_manager& mpm) const;
 };
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
+#endif // MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
-#endif
--- a/src/include/migraphx/op/binary.hpp
+++ b/src/include/migraphx/op/binary.hpp
@@ -28,6 +28,7 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -60,10 +61,19 @@ struct binary : op_name<Derived>
    value attributes() const { return base_attributes(); }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, static_cast<const Derived&>(*this)}.has(2).same_type().same_dims();
+        check_shapes{inputs, static_cast<const Derived&>(*this), true}
+            .has(2)
+            .same_type()
+            .same_dims();
        auto s0 = inputs.at(0);
        auto s1 = inputs.at(1);
-        if(s0 == s1 and s0.packed())
+        if(s0.dynamic() or s1.dynamic())
+        {
+            if(s0 == s1)
+                return s0;
+            MIGRAPHX_THROW("BINARY: " + point_function() + ": fixed-dyn shape for inputs");
+        }
+        else if(s0 == s1 and s0.packed())
        {
            return s0;
        }
@@ -81,9 +91,9 @@ struct binary : op_name<Derived>
        }
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
            std::transform(input1.begin(),
                           input1.end(),

--- a/src/include/migraphx/op/broadcast.hpp
+++ b/src/include/migraphx/op/broadcast.hpp
@@ -27,23 +27,30 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/dyn_output.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-/// The broadcast operator performs the numpy-style broadcasting of an axis of a given tensor. This
+/**
-/// is achieved primarily by setting the stride of the broadcasted axis to zero. Linear indicies are
+ * 1 input version:
-/// computed from multi-indicies by computing the inner product on the multi-index with the strides.
+ * Broadcasts a tensor from the original shape to the broadcast_lens by setting the stride of
-/// For example, if we have a tensor A(2,3) it has lengths of (2,3) and strides of (3,1). If we want
+ * broadcasted dimensions to zero. `axis` attribute for a 1D input shape is the output dimension
-/// to compute the linear offset that corresponds to the element on the 2nd row (i = 1) and 3rd
+ * that stays the same. ex: broadcasting shape [1024] -> [4, 1024, 3] has axis = 1 For higher rank
-/// column (j = 2), we compute the following inner product (1,2) dot (3, 1) = 1*3 + 2*1 = 5. It is
+ * input shapes, axis is an offset parameter for the broadcasting. Such that this operator would
-/// obvious from there that we can negate the effects of a given axis by setting the stride of that
+ * work in the opposite direction of NumPy broadcasting. ex: broadcasting shape [2, 2] -> [2, 2, 3]
-/// axis to zero.
+ * with axis = 0
+ *
+ * 2 input version:
+ * Broadcast the first input 1D shape into the second input shape based on the axis parameter.
+ * Handles broadcasting a 1D static shape into a higher rank dynamic shape.
+ * broadcast_lens is not used
+ */
 struct broadcast
 {
-    uint64_t axis = 0;
+    uint64_t axis                           = 0;
-    std::vector<std::size_t> broadcast_lens;
+    std::vector<std::size_t> broadcast_lens = {};
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -54,36 +61,86 @@ struct broadcast
    std::string name() const { return "broadcast"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        auto input = inputs.at(0);
+        check_shapes{inputs, *this, true}.has(1, 2);
-        auto t     = input.type();
+        auto s0 = inputs.at(0);
+        auto t  = s0.type();
-        std::vector<size_t> bcast_strides(broadcast_lens.size(), 0);
+        if(inputs.size() == 1)
-        // the broacast op is deprecated now, so not handling the negative
-        // value of axis anymore
-        if(axis >= broadcast_lens.size())
        {
-            MIGRAPHX_THROW("BROADCAST : axis is out of range");
+            // the ONNX broadcast op is deprecated now, so not handling the negative
-        }
+            // value of axis anymore
+            if(axis >= broadcast_lens.size())
+            {
+                MIGRAPHX_THROW("BROADCAST : axis " + migraphx::to_string(axis) +
+                               " is out of range");
+            }
+            if(broadcast_lens.size() - axis < s0.lens().size())
+            {
+                MIGRAPHX_THROW("BROADCAST: (broadcast ndims - axis) is less than s0 ndims");
+            }
+            if(not std::equal(s0.lens().begin(), s0.lens().end(), broadcast_lens.begin() + axis))
+            {
+                MIGRAPHX_THROW("BROADCAST: when broadcasting, succeeding sizes must match");
+            }
-        if(broadcast_lens.size() - axis < input.lens().size())
+            std::vector<size_t> bcast_strides(broadcast_lens.size(), 0);
-        {
+            std::copy(s0.strides().begin(), s0.strides().end(), bcast_strides.begin() + axis);
-            MIGRAPHX_THROW("BROADCAST: (broadcast ndims - axis) is less than input ndims");
+            shape output{t, broadcast_lens, std::move(bcast_strides)};
+            if(output.elements() < s0.elements())
+            {
+                // don't think this can occur?
+                MIGRAPHX_THROW("BROADCAST: output size must be greater than or equal to s0 size");
+            }
+            return output;
        }
+        else
-        if(not std::equal(input.lens().begin(), input.lens().end(), broadcast_lens.begin() + axis))
        {
-            MIGRAPHX_THROW("BROADCAST: when broadcasting, succeeding sizes must match");
+            // two inputs
-        }
+            auto s1 = inputs.at(1);
-        std::copy(input.strides().begin(), input.strides().end(), bcast_strides.begin() + axis);
+            if(s0.dynamic())
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 is a dynamic shape, does not handle broadcasting "
+                               "a dynamic shape");
+            }
+            if(s0.ndim() != 1)
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 has ndim " + migraphx::to_string(s0.ndim()) +
+                               ", only handle ndim = 1");
+            }
+            if(axis >= s1.ndim())
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: axis " + migraphx::to_string(axis) +
+                               " is out of range");
+            }
+            if(s1.dynamic())
+            {
+                s0 = s0.to_dynamic();
+                if(s0.dyn_dims()[0] != s1.dyn_dims()[axis])
+                {
+                    MIGRAPHX_THROW("BROADCAST_2in: s0 length doesn't match with dynamic s1 axis "
+                                   "dimension length (" +
+                                   migraphx::to_string(s0.dyn_dims()[0]) +
+                                   " != " + migraphx::to_string(s1.dyn_dims()[axis]) + ")");
+                }
+                return s1;
+            }
-        shape output{t, broadcast_lens, std::move(bcast_strides)};
+            if(s0.lens()[0] != s1.lens()[axis])
-        if(output.elements() < input.elements())
+            {
-            MIGRAPHX_THROW("BROADCAST: output size must be greater than or equal to input size");
+                MIGRAPHX_THROW("BROADCAST_2in: s0 length doesn't match with static s1 axis "
-        return output;
+                               "dimension length (" +
+                               migraphx::to_string(s0.lens()[0]) +
+                               " != " + migraphx::to_string(s1.lens()[axis]) + ")");
+            }
+            std::vector<size_t> bcast_strides(s1.ndim(), 0);
+            std::copy(s0.strides().begin(), s0.strides().end(), bcast_strides.begin() + axis);
+            shape output{t, s1.lens(), std::move(bcast_strides)};
+            return output;
+        }
    }
-    argument compute(shape output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(output_shape);
+        return args[0].reshape(dyn_out.computed_shape);
    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/op/common.hpp
+++ b/src/include/migraphx/op/common.hpp
@@ -33,11 +33,11 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+// Padding mode is default_ for fixed shape padding.
+// same_lower and same_upper used for dynamic padding.
 enum padding_mode_t
 {
    default_, // NOLINT
-    same,
-    valid,
    same_lower,
    same_upper
 };

--- a/src/include/migraphx/op/contiguous.hpp
+++ b/src/include/migraphx/op/contiguous.hpp
@@ -28,6 +28,7 @@
 #include <migraphx/argument.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/dyn_output.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -42,19 +43,27 @@ namespace op {
 struct contiguous
 {
    std::string name() const { return "contiguous"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
+        check_shapes{inputs, *this, true}.has(1);
-        if(inputs.front().standard())
+        auto s0 = inputs.front();
-            return inputs.front();
+        if(s0.dynamic() or s0.standard())
-        auto lens = inputs.at(0).lens();
+        {
-        auto t    = inputs.at(0).type();
+            return s0;
-        return {t, lens};
+        }
+        else
+        {
+            const auto& lens = s0.lens();
+            auto t           = s0.type();
+            return {t, lens};
+        }
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        assert(output_shape.standard());
+        assert(dyn_out.computed_shape.standard());
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
            shape_for_each(output.get_shape(), [&](const auto& idx) {
                output(idx.begin(), idx.end()) = input(idx.begin(), idx.end());

--- a/src/include/migraphx/op/convert.hpp
+++ b/src/include/migraphx/op/convert.hpp
@@ -44,7 +44,7 @@ struct convert : unary<convert>
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
+        check_shapes{inputs, *this, true}.has(1);
        auto input = inputs.at(0);
        if(input.dynamic())
        {

--- a/src/include/migraphx/op/convolution.hpp
+++ b/src/include/migraphx/op/convolution.hpp
@@ -41,9 +41,8 @@ struct convolution
    std::vector<std::size_t> stride   = {1, 1};
    std::vector<std::size_t> dilation = {1, 1};
-    int group                      = 1;
+    int group                   = 1;
-    padding_mode_t padding_mode    = default_;
+    padding_mode_t padding_mode = default_;
-    bool use_dynamic_same_auto_pad = false;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -52,16 +51,15 @@ struct convolution
                    f(self.stride, "stride"),
                    f(self.dilation, "dilation"),
                    f(self.group, "group"),
-                    f(self.padding_mode, "padding_mode"),
+                    f(self.padding_mode, "padding_mode"));
-                    f(self.use_dynamic_same_auto_pad, "use_dynamic_same_auto_pad"));
    }
    std::string name() const { return "convolution"; }
    void check_attribute_size() const
    {
-        if(not((padding.size() == stride.size() or (padding.size() / 2) == stride.size()) and
+        if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
-               stride.size() == dilation.size()))
+           stride.size() != dilation.size())
        {
            MIGRAPHX_THROW("CONVOLUTION: inconsistent attribute sizes");
        }
@@ -76,7 +74,8 @@ struct convolution
        // num of dims of input and attribute should match
        const auto input_size   = inputs[0].max_lens().size();
        const auto padding_size = padding.size();
-        if(not(input_size == padding_size / 2 + 2 or input_size == padding_size + 2))
+        if(input_size != padding_size / 2 + 2 && input_size != padding_size + 2)
        {
            MIGRAPHX_THROW("CONVOLUTION: input and attribute size mismatch!");
        }
@@ -93,13 +92,6 @@ struct convolution
           x_shape.lens().at(1) != (w_shape.lens().at(1) * group))
            MIGRAPHX_THROW("CONVOLUTION: mismatched channel numbers");
-        std::vector<op::padding_mode_t> dyn_pad_modes = {op::padding_mode_t::same_upper,
-                                                         op::padding_mode_t::same_lower};
-        if(use_dynamic_same_auto_pad and not contains(dyn_pad_modes, padding_mode))
-        {
-            MIGRAPHX_THROW("CONVOLUTION: use_dynamic_same_auto_pad set with invalid padding mode");
-        }
        if(x_shape.dynamic() or w_shape.dynamic())
        {
            return dynamic_compute_shape(x_shape, w_shape);
@@ -161,7 +153,7 @@ struct convolution
        dynamic_shape_push_back(w_shape);
        const size_t num_spatial_dims = x_shape.max_lens().size() - 2;
-        if(use_dynamic_same_auto_pad)
+        if(padding_mode != default_)
        {
            for(std::size_t i = 0; i < num_spatial_dims; ++i)
            {

--- a/src/include/migraphx/op/deconvolution.hpp
+++ b/src/include/migraphx/op/deconvolution.hpp
@@ -61,8 +61,8 @@ struct deconvolution
    void check_attribute_size() const
    {
-        if(not((padding.size() == stride.size() or (padding.size() / 2) == stride.size()) and
+        if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
-               stride.size() == dilation.size()))
+           stride.size() != dilation.size())
        {
            MIGRAPHX_THROW("deconvolution: inconsistent attribute sizes");
        }

--- a/src/include/migraphx/op/elu.hpp
+++ b/src/include/migraphx/op/elu.hpp
@@ -32,14 +32,13 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-struct elu
+struct elu : unary<elu>
 {
-    std::string name() const { return "elu"; }
    float alpha = 1;
-    shape compute_shape(std::vector<shape> inputs) const
+    std::string point_op() const
    {
-        check_shapes{inputs, *this}.has(1);
+        return "${function:where}(${0} > 0, ${0}, ${alpha} * (${function:exp}(${0}) - 1))";
-        return inputs.front();
    }
    template <class Self, class F>
@@ -47,6 +46,11 @@ struct elu
    {
        return pack(f(self.alpha, "alpha"));
    }
+    auto apply() const
+    {
+        return [&](auto x) { return x > 0 ? x : alpha * std::expm1(x); };
+    }
 };
 } // namespace op

--- a/src/include/migraphx/op/batch_norm_inference.hpp
+++ b/src/include/migraphx/op/batch_norm_inference.hpp
@@ -21,50 +21,48 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_OPERATORS_BATCH_NORM_HPP
+#ifndef MIGRAPHX_GUARD_OP_LAYOUT_HPP
-#define MIGRAPHX_GUARD_OPERATORS_BATCH_NORM_HPP
+#define MIGRAPHX_GUARD_OP_LAYOUT_HPP
-#include <migraphx/check_shapes.hpp>
 #include <migraphx/config.hpp>
+#include <array>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/op/unary.hpp>
 #include <cmath>
+#include <utility>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-struct batch_norm_inference
+struct layout : unary<layout>
 {
-    float epsilon  = 1.0e-6f;
+    std::vector<int64_t> permutation;
-    float momentum = 0.9f;
-    std::string name() const { return "batch_norm_inference"; }
-    enum bn_infer_mode_t
-    {
-        per_activation,
-        spatial,
-    };
-    bn_infer_mode_t bn_mode = spatial;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(
+        return pack(f(self.permutation, "permutation"));
-            f(self.epsilon, "epsilon"), f(self.momentum, "momentum"), f(self.bn_mode, "bn_mode"));
    }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(5);
+        check_shapes{inputs, *this}.has(1).only_dims(permutation.size());
-        check_shapes{inputs.data(), inputs.data() + 1, *this}.same_ndims();
+        auto lens = inputs.at(0).lens();
-        check_shapes{inputs.data() + 1, inputs.data() + inputs.size(), *this}.same_shape();
+        auto t    = inputs.at(0).type();
-        return inputs.front();
+        return shape::from_permutation(t, lens, permutation);
+    }
+    auto apply() const
+    {
+        return [](auto x) { return x; };
    }
 };
 } // namespace op
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
+#endif // MIGRAPHX_GUARD_OP_LAYOUT_HPP
-#endif
--- a/src/include/migraphx/op/leaky_relu.hpp
+++ b/src/include/migraphx/op/leaky_relu.hpp
@@ -26,12 +26,13 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/op/unary.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
-struct leaky_relu
+struct leaky_relu : unary<leaky_relu>
 {
    float alpha = 0.01;
@@ -41,11 +42,13 @@ struct leaky_relu
        return pack(f(self.alpha, "alpha"));
    }
+    std::string point_op() const { return "${function:where}(${0} > 0, ${0}, ${alpha} * ${0})"; }
    std::string name() const { return "leaky_relu"; }
-    shape compute_shape(std::vector<shape> inputs) const
+    auto apply() const
    {
-        check_shapes{inputs, *this}.has(1);
+        return [&](auto x) { return x > 0 ? x : x * alpha; };
-        return inputs.front();
    }
 };

--- a/src/include/migraphx/op/multibroadcast.hpp
+++ b/src/include/migraphx/op/multibroadcast.hpp
@@ -26,64 +26,105 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
+#include <migraphx/dyn_output.hpp>
+#include <migraphx/common.hpp>
 #include <migraphx/config.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Broadcast multiple dimensions between two tensors.
+ * Two versions of this operator: one input and two inputs.
+ * One input version uses output_lens attribute and broadcasts to it.
+ * Two inputs version broadcasts both inputs to the common shape at evaluation time.
+ */
 struct multibroadcast
 {
-    std::vector<std::size_t> output_lens;
+    std::vector<std::size_t> output_lens = {};
+    // optional attribute
+    std::vector<shape::dynamic_dimension> output_dyn_dims = {};
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.output_lens, "out_lens"));
+        return pack(f(self.output_lens, "out_lens"), f(self.output_dyn_dims, "out_dyn_dims"));
    }
    std::string name() const { return "multibroadcast"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
+        check_shapes{inputs, *this, true}.has(1, 2);
-        auto t     = inputs.at(0).type();
-        auto input = inputs.at(0);
-        if(input.lens().empty())
+        auto t  = inputs.at(0).type();
-        {
+        auto s0 = inputs.at(0);
-            MIGRAPHX_THROW("MULTIBROADCAST: inputs dimensions should be > 0");
-        }
-        if(input.lens().size() > output_lens.size())
+        if(s0.max_lens().empty())
        {
-            MIGRAPHX_THROW("MULTIBROADCAST: inputs dimensions should <= output size");
+            MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should be > 0");
        }
-        auto offset = output_lens.size() - input.lens().size();
+        auto make_bcast_strides = [&](std::vector<std::size_t> bcast_lens, std::size_t offset) {
-        for(std::ptrdiff_t i = input.lens().size() - 1; i >= 0; i--)
+            std::vector<size_t> bcast_strides(bcast_lens.size(), 0);
+            for(std::ptrdiff_t i = s0.lens().size() - 1; i >= 0; i--)
+            {
+                if(bcast_lens[i + offset] == s0.lens()[i])
+                {
+                    bcast_strides[i + offset] = s0.strides()[i];
+                }
+            }
+            return bcast_strides;
+        };
+        if(inputs.size() == 1)
        {
-            if(output_lens[i + offset] != input.lens()[i] and input.lens()[i] != 1)
+            if(s0.lens().size() > output_lens.size())
            {
-                MIGRAPHX_THROW("MULTIBROADCAST: input shape {" + to_string_range(input.lens()) +
+                MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should <= output size");
-                               "} cannot be broadcasted to {" + to_string_range(output_lens) +
-                               "}!");
            }
-        }
-        std::vector<size_t> bcast_strides(output_lens.size(), 0);
+            auto offset = output_lens.size() - s0.lens().size();
-        for(std::ptrdiff_t i = input.lens().size() - 1; i >= 0; i--)
+            for(std::ptrdiff_t i = s0.lens().size() - 1; i >= 0; i--)
+            {
+                if(output_lens[i + offset] != s0.lens()[i] and s0.lens()[i] != 1)
+                {
+                    MIGRAPHX_THROW("MULTIBROADCAST: input shape {" + to_string_range(s0.lens()) +
+                                   "} cannot be broadcasted to {" + to_string_range(output_lens) +
+                                   "}!");
+                }
+            }
+            auto bcast_strides = make_bcast_strides(output_lens, offset);
+            return {t, output_lens, std::move(bcast_strides)};
+        }
+        else
        {
-            if(output_lens[i + offset] == input.lens()[i])
+            // two inputs
+            auto s1 = inputs.at(1);
+            if(s0.dynamic() or s1.dynamic())
            {
-                bcast_strides[i + offset] = input.strides()[i];
+                if(not output_dyn_dims.empty())
+                {
+                    return {t, output_dyn_dims};
+                }
+                return {t, compute_broadcasted_dyn_dims(s0, s1)};
+            }
+            else
+            {
+                auto bcast_lens    = compute_broadcasted_lens(s0.lens(), s1.lens());
+                auto offset        = bcast_lens.size() - s0.lens().size();
+                auto bcast_strides = make_bcast_strides(bcast_lens, offset);
+                return {t, std::move(bcast_lens), std::move(bcast_strides)};
            }
        }
-        return {t, output_lens, bcast_strides};
    }
-    argument compute(shape output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(output_shape);
+        return args[0].reshape(dyn_out.computed_shape);
    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/op/pooling.hpp
+++ b/src/include/migraphx/op/pooling.hpp
@@ -64,8 +64,8 @@ struct pooling
    void check_attribute_size() const
    {
-        if(not((padding.size() == stride.size() or (padding.size() / 2) == stride.size()) and
+        if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
-               stride.size() == lengths.size()))
+           stride.size() != lengths.size())
        {
            MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
        }
@@ -83,7 +83,7 @@ struct pooling
        size_t kdims      = input_lens.size() - 2;
        auto input_size   = inputs[0].lens().size();
        auto padding_size = padding.size();
-        if(not(input_size == padding_size / 2 + 2 or input_size == padding_size + 2))
+        if(input_size != padding_size / 2 + 2 and input_size != padding_size + 2)
        {
            MIGRAPHX_THROW("POOLING: input and attribute size mismatch!");
        }

--- a/src/include/migraphx/op/quant_convolution.hpp
+++ b/src/include/migraphx/op/quant_convolution.hpp
@@ -41,9 +41,8 @@ struct quant_convolution
    std::vector<std::size_t> stride   = {1, 1};
    std::vector<std::size_t> dilation = {1, 1};
-    padding_mode_t padding_mode    = default_;
+    padding_mode_t padding_mode = default_;
-    int group                      = 1;
+    int group                   = 1;
-    bool use_dynamic_same_auto_pad = false;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -52,8 +51,7 @@ struct quant_convolution
                    f(self.stride, "stride"),
                    f(self.dilation, "dilation"),
                    f(self.padding_mode, "padding_mode"),
-                    f(self.group, "group"),
+                    f(self.group, "group"));
-                    f(self.use_dynamic_same_auto_pad, "use_dynamic_same_auto_pad"));
    }
    value attributes() const
@@ -65,8 +63,8 @@ struct quant_convolution
    void check_attribute_size() const
    {
-        if(not((padding.size() == stride.size() or (padding.size() / 2) == stride.size()) and
+        if((padding.size() != stride.size() and (padding.size() / 2) != stride.size()) or
-               stride.size() == dilation.size()))
+           stride.size() != dilation.size())
        {
            MIGRAPHX_THROW("QUANT_CONVOLUTION: inconsistent attribute sizes");
        }

--- a/src/include/migraphx/op/unary.hpp
+++ b/src/include/migraphx/op/unary.hpp
@@ -30,6 +30,7 @@
 #include <migraphx/argument.hpp>
 #include <migraphx/stringutils.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -62,9 +63,9 @@ struct unary : op_name<Derived>
    value attributes() const { return base_attributes(); }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, static_cast<const Derived&>(*this)}.has(1);
+        check_shapes{inputs, static_cast<const Derived&>(*this), true}.has(1);
        auto s = inputs.at(0);
-        if(s.scalar())
+        if(s.dynamic() or s.scalar())
        {
            return s;
        }
@@ -78,9 +79,9 @@ struct unary : op_name<Derived>
        }
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        result.visit([&](auto output) {
            args[0].visit([&](auto input) {
                std::transform(input.begin(),

--- a/src/include/migraphx/operation.hpp
+++ b/src/include/migraphx/operation.hpp
@@ -32,6 +32,8 @@
 #include <utility>
 #include <unordered_map>
 #include <migraphx/reflect.hpp>
+#include <migraphx/dyn_output.hpp>
+#include <migraphx/functional.hpp>
 #include <migraphx/streamutils.hpp>
 #include <migraphx/normalize_attributes.hpp>
 #include <migraphx/argument.hpp>
@@ -199,9 +201,12 @@ auto compute_op(rank<1>,
                context& ctx,
                const shape& output_shape,
                const std::vector<argument>& input)
-    -> decltype(x.compute(auto_any_cast(ctx), output_shape, input))
+    -> decltype(x.compute(auto_any_cast(ctx),
+                          make_compute_output_shape(pack(x, output_shape, input)),
+                          input))
 {
-    return x.compute(auto_any_cast(ctx), output_shape, input);
+    return x.compute(
+        auto_any_cast(ctx), make_compute_output_shape(pack(x, output_shape, input)), input);
 }
 template <class T>
@@ -220,9 +225,9 @@ compute_op(const T& x, context& ctx, const shape& output_shape, const std::vecto
 template <class T>
 auto compute_op(rank<1>, const T& x, const shape& output_shape, const std::vector<argument>& input)
-    -> decltype(x.compute(output_shape, input))
+    -> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input))
 {
-    return x.compute(output_shape, input);
+    return x.compute(make_compute_output_shape(pack(x, output_shape, input)), input);
 }
 template <class T>
@@ -244,9 +249,11 @@ auto compute_op(rank<1>,
                const shape& output,
                const std::vector<argument>& inputs,
                const std::vector<module_ref>& module_args,
-                F f) -> decltype(x.compute(output, inputs, module_args, f))
+                F f)
+    -> decltype(
+        x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
 {
-    return x.compute(output, inputs, module_args, f);
+    return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
 }
 template <class T, class F>
@@ -278,9 +285,17 @@ auto compute_op(rank<4>,
                const shape& output,
                const std::vector<argument>& inputs,
                const std::vector<module_ref>& module_args,
-                F f) -> decltype(x.compute(auto_any_cast(ctx), output, inputs, module_args, f))
+                F f) -> decltype(x.compute(auto_any_cast(ctx),
+                                           make_compute_output_shape(pack(x, output, inputs)),
+                                           inputs,
+                                           module_args,
+                                           f))
 {
-    return x.compute(auto_any_cast(ctx), output, inputs, module_args, f);
+    return x.compute(auto_any_cast(ctx),
+                     make_compute_output_shape(pack(x, output, inputs)),
+                     inputs,
+                     module_args,
+                     f);
 }
 template <class T, class F>
@@ -290,9 +305,11 @@ auto compute_op(rank<3>,
                const shape& output,
                const std::vector<argument>& inputs,
                const std::vector<module_ref>& module_args,
-                F f) -> decltype(x.compute(output, inputs, module_args, f))
+                F f)
+    -> decltype(
+        x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f))
 {
-    return x.compute(output, inputs, module_args, f);
+    return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs, module_args, f);
 }
 template <class T, class F>
@@ -302,9 +319,10 @@ auto compute_op(rank<2>,
                const shape& output,
                const std::vector<argument>& inputs,
                const std::vector<module_ref>&,
-                F) -> decltype(x.compute(output, inputs))
+                F)
+    -> decltype(x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs))
 {
-    return x.compute(output, inputs);
+    return x.compute(make_compute_output_shape(pack(x, output, inputs)), inputs);
 }
 template <class T, class F>
@@ -314,9 +332,12 @@ auto compute_op(rank<1>,
                const shape& output,
                const std::vector<argument>& inputs,
                const std::vector<module_ref>&,
-                F) -> decltype(x.compute(auto_any_cast(ctx), output, inputs))
+                F) -> decltype(x.compute(auto_any_cast(ctx),
+                                         make_compute_output_shape(pack(x, output, inputs)),
+                                         inputs))
 {
-    return x.compute(auto_any_cast(ctx), output, inputs);
+    return x.compute(
+        auto_any_cast(ctx), make_compute_output_shape(pack(x, output, inputs)), inputs);
 }
 template <class T, class F>
@@ -348,7 +369,8 @@ auto is_context_free_op(rank<1>,
                        const T& x,
                        const shape& output_shape,
                        const std::vector<argument>& input)
-    -> decltype(x.compute(output_shape, input), std::true_type{});
+    -> decltype(x.compute(make_compute_output_shape(pack(x, output_shape, input)), input),
+                std::true_type{});
 template <class T>
 auto is_context_free_op(rank<0>, const T&, const shape&, const std::vector<argument>&)