Merge branch 'develop' into fix_parse_if

3eaeeca9 · Ted Themistokleous · GitHub · cccf7d09 · af7e6eaa · 3eaeeca9
Unverified Commit 3eaeeca9 authored Nov 18, 2022 by Ted Themistokleous Committed by GitHub Nov 18, 2022
20 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -74,7 +74,8 @@ RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cma
 RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF

 # Install newer cmake for onnx runtime
-RUN cget -p /opt/cmake install kitware/cmake@v3.13.4
+ARG CMAKE_VERSION=3.24.2
+RUN cget -p /opt/cmake install -X binary https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz

 ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
 ARG ONNXRUNTIME_BRANCH=main

--- a/src/common.cpp
+++ b/src/common.cpp
@@ -27,6 +27,7 @@
 #include <migraphx/algorithm.hpp>
 #include <migraphx/stringutils.hpp>
 #include <migraphx/instruction.hpp>
+#include <migraphx/ranges.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -43,6 +44,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 // In this case we need to broadcast the (:,:,1:,:) axis
 // of s0 plus the 1st dimension of s1 giving
 // output_lens = (3,2,7,5)
+//
 std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
                                                  std::vector<std::size_t> s1)
 {
@@ -50,25 +52,63 @@ std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
        return s0;
    if(s0.size() > s1.size())
        s0.swap(s1);
-
    std::vector<std::size_t> out_lens(s1);
    auto offset = s1.size() - s0.size();
    std::transform(
        s0.begin(), s0.end(), s1.begin() + offset, out_lens.begin() + offset, [&](auto a, auto b) {
            if(a != b and a != 1 and b != 1)
            {
-                MIGRAPHX_THROW("COMPUTE_BROADCASTLEN: shape {" + to_string_range(s0) + "} and {" +
-                               to_string_range(s1) + "} mismatch!");
+                MIGRAPHX_THROW("COMPUTE_BROADCASTLEN: shape {" + migraphx::to_string_range(s0) +
+                               "} and {" + migraphx::to_string_range(s1) + "} mismatch!");
            }
            return std::max(a, b);
        });
-
    return out_lens;
 }

+std::vector<shape::dynamic_dimension> compute_broadcasted_dyn_dims(shape s0, shape s1)
+{
+    // change both shapes to dynamic_dimension representation
+    s0 = s0.to_dynamic();
+    s1 = s1.to_dynamic();
+    if(s0.ndim() > s1.ndim())
+    {
+        std::swap(s0, s1);
+    }
+    auto offset = s1.ndim() - s0.ndim();
+    std::vector<shape::dynamic_dimension> out_dims(s1.dyn_dims());
+    shape::dynamic_dimension one_dyn_dim{1, 1, 0};
+    std::transform(
+        s0.dyn_dims().cbegin(),
+        s0.dyn_dims().cend(),
+        s1.dyn_dims().cbegin() + offset,
+        out_dims.begin() + offset,
+        [&](auto a, auto b) {
+            if(a == b)
+            {
+                return a;
+            }
+            else if(a == one_dyn_dim or b == one_dyn_dim)
+            {
+                // setting opt to 0, may need to be changed
+                return shape::dynamic_dimension{std::max(a.min, b.min), std::max(a.max, b.max), 0};
+            }
+            else
+            {
+                MIGRAPHX_THROW("COMPUTE_BROADCASTED_DYN_DIMS: dynamic shapes {" +
+                               migraphx::to_string_range(s0.dyn_dims()) + "} and {" +
+                               migraphx::to_string_range(s1.dyn_dims()) + "} mismatch!");
+            }
+        });
+    return out_dims;
+}
+
+// Compute the common (broadcasted) dimensions of a list of fixed shapes
 std::vector<std::size_t> compute_common_lens(const std::vector<shape>& shapes)
 {
    assert(not shapes.empty());
+    assert(
+        std::none_of(shapes.cbegin(), shapes.cend(), [](auto shape) { return shape.dynamic(); }));
    return transform_accumulate(shapes.begin() + 1,
                                shapes.end(),
                                shapes.front().lens(),
@@ -114,20 +154,63 @@ instruction_ref insert_common_op(module& m,
                                 const operation& op,
                                 std::vector<instruction_ref> inputs)
 {
-    auto common = common_shape(to_shapes(inputs));
-    std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
-        if(input->get_shape().lens() != common.lens())
+    if(std::any_of(
+           inputs.cbegin(), inputs.cend(), [](auto input) { return input->get_shape().dynamic(); }))
+    {
+        // currently only handles the binary case
+        if(inputs.size() != 2)
        {
-            input = m.insert_instruction(
-                ins, make_op("multibroadcast", {{"out_lens", common.lens()}}), input);
+            MIGRAPHX_THROW("INSERT_COMMON_OP: not handled; " + migraphx::to_string(inputs.size()) +
+                           "inputs, only handle two inputs if any are dynamic shape");
        }
-        if(input->get_shape().type() != common.type())
+
+        auto c_type = compute_common_types(to_shapes(inputs));
+        auto c_dyn_dims =
+            compute_broadcasted_dyn_dims(inputs[0]->get_shape(), inputs[1]->get_shape());
+
+        // following should work for a static or dynamic shape
+        if(inputs[0]->get_shape().dyn_dims() != c_dyn_dims)
        {
-            input = m.insert_instruction(
-                ins, make_op("convert", {{"target_type", common.type()}}), input);
+            inputs[0] = m.insert_instruction(
+                ins,
+                make_op("multibroadcast", {{"out_dyn_dims", to_value(c_dyn_dims)}}),
+                inputs[0],
+                inputs[1]);
        }
-        return input;
-    });
+        if(inputs[1]->get_shape().dyn_dims() != c_dyn_dims)
+        {
+            inputs[1] = m.insert_instruction(
+                ins,
+                make_op("multibroadcast", {{"out_dyn_dims", to_value(c_dyn_dims)}}),
+                inputs[1],
+                inputs[0]);
+        }
+        std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
+            if(input->get_shape().type() != c_type)
+            {
+                input =
+                    m.insert_instruction(ins, make_op("convert", {{"target_type", c_type}}), input);
+            }
+            return input;
+        });
+    }
+    else
+    {
+        auto common = common_shape(to_shapes(inputs));
+        std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
+            if(input->get_shape().lens() != common.lens())
+            {
+                input = m.insert_instruction(
+                    ins, make_op("multibroadcast", {{"out_lens", common.lens()}}), input);
+            }
+            if(input->get_shape().type() != common.type())
+            {
+                input = m.insert_instruction(
+                    ins, make_op("convert", {{"target_type", common.type()}}), input);
+            }
+            return input;
+        });
+    }
    return m.insert_instruction(ins, op, inputs);
 }


--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -42,6 +42,13 @@ static bool try_compute_shape(instruction_ref ins,
    try
    {
        shape new_shape = ins->get_operator().compute_shape(inputs, mods);
+
+        // Cannot tell if a dynamic shape will need to be made contiguous
+        if(new_shape.dynamic())
+        {
+            return false;
+        }
+
        // If the output shape is a standard shape, no need to try its output
        if(new_shape.standard())
        {
@@ -133,14 +140,20 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
        }
    }

-    // Perform evaluations in parallel
+    // Perform static contiguous evaluations in parallel
    std::vector<argument> literals(const_instructions.size());
    par_for(const_instructions.size(), 1, [&](const auto i) {
-        auto c      = op::contiguous{};
-        auto prev   = const_instructions[i]->inputs().front();
-        literals[i] = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
+        auto c    = op::contiguous{};
+        auto prev = const_instructions[i]->inputs().front();
+        // compute the output contiguous shape from the previous instruction shape
+        shape computed_shape                   = c.compute_shape({prev->get_shape()});
+        const std::vector<argument>& prev_eval = {prev->eval()};
+        // prev_eval should not be used in make_compute_output_shape() as computed_shape is static
+        auto co_shape = make_compute_output_shape(pack(c, computed_shape, prev_eval));
+        literals[i]   = c.compute(co_shape, prev_eval);
    });

+    // Replace static contiguous operations with a literal
    for(size_t i = 0; i < const_instructions.size(); i++)
    {
        auto l = m.add_literal(literals[i].get_shape(), literals[i].data());

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -45,7 +45,16 @@ static literal get_scalar(instruction_ref ins)
        return {};
    auto e = ins->eval();
    literal r{};
-    e.visit_at([&](auto x) { r = literal{x}; });
+    // needed for bool as visit_at invokes as() which promotes bool to int8
+    // Without this we'll break type checks for logical ops that are fused.
+    if(e.get_shape().type() == shape::bool_type)
+    {
+        r = literal{e.at<bool>()};
+    }
+    else
+    {
+        e.visit_at([&](auto x) { r = literal{x}; });
+    }
    return r;
 }


--- a/src/include/migraphx/common.hpp
+++ b/src/include/migraphx/common.hpp
@@ -36,6 +36,9 @@ struct operation;

 std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
                                                  std::vector<std::size_t> s1);
+
+std::vector<shape::dynamic_dimension> compute_broadcasted_dyn_dims(shape s0, shape s1);
+
 shape common_shape(const std::vector<shape>& shapes);

 instruction_ref insert_common_op(module& m,

--- a/src/include/migraphx/op/binary.hpp
+++ b/src/include/migraphx/op/binary.hpp
@@ -28,6 +28,7 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -60,10 +61,19 @@ struct binary : op_name<Derived>
    value attributes() const { return base_attributes(); }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, static_cast<const Derived&>(*this)}.has(2).same_type().same_dims();
+        check_shapes{inputs, static_cast<const Derived&>(*this), true}
+            .has(2)
+            .same_type()
+            .same_dims();
        auto s0 = inputs.at(0);
        auto s1 = inputs.at(1);
-        if(s0 == s1 and s0.packed())
+        if(s0.dynamic() or s1.dynamic())
+        {
+            if(s0 == s1)
+                return s0;
+            MIGRAPHX_THROW("BINARY: " + point_function() + ": fixed-dyn shape for inputs");
+        }
+        else if(s0 == s1 and s0.packed())
        {
            return s0;
        }
@@ -81,9 +91,9 @@ struct binary : op_name<Derived>
        }
    }

-    argument compute(const shape& output_shape, std::vector<argument> args) const
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        visit_all(result, args[0], args[1])([&](auto output, auto input1, auto input2) {
            std::transform(input1.begin(),
                           input1.end(),

--- a/src/include/migraphx/op/broadcast.hpp
+++ b/src/include/migraphx/op/broadcast.hpp
@@ -27,23 +27,30 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/dyn_output.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

-/// The broadcast operator performs the numpy-style broadcasting of an axis of a given tensor. This
-/// is achieved primarily by setting the stride of the broadcasted axis to zero. Linear indicies are
-/// computed from multi-indicies by computing the inner product on the multi-index with the strides.
-/// For example, if we have a tensor A(2,3) it has lengths of (2,3) and strides of (3,1). If we want
-/// to compute the linear offset that corresponds to the element on the 2nd row (i = 1) and 3rd
-/// column (j = 2), we compute the following inner product (1,2) dot (3, 1) = 1*3 + 2*1 = 5. It is
-/// obvious from there that we can negate the effects of a given axis by setting the stride of that
-/// axis to zero.
+/**
+ * 1 input version:
+ * Broadcasts a tensor from the original shape to the broadcast_lens by setting the stride of
+ * broadcasted dimensions to zero. `axis` attribute for a 1D input shape is the output dimension
+ * that stays the same. ex: broadcasting shape [1024] -> [4, 1024, 3] has axis = 1 For higher rank
+ * input shapes, axis is an offset parameter for the broadcasting. Such that this operator would
+ * work in the opposite direction of NumPy broadcasting. ex: broadcasting shape [2, 2] -> [2, 2, 3]
+ * with axis = 0
+ *
+ * 2 input version:
+ * Broadcast the first input 1D shape into the second input shape based on the axis parameter.
+ * Handles broadcasting a 1D static shape into a higher rank dynamic shape.
+ * broadcast_lens is not used
+ */
 struct broadcast
 {
-    uint64_t axis = 0;
-    std::vector<std::size_t> broadcast_lens;
+    uint64_t axis                           = 0;
+    std::vector<std::size_t> broadcast_lens = {};

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -54,36 +61,86 @@ struct broadcast
    std::string name() const { return "broadcast"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        auto input = inputs.at(0);
-        auto t     = input.type();
-
-        std::vector<size_t> bcast_strides(broadcast_lens.size(), 0);
-        // the broacast op is deprecated now, so not handling the negative
-        // value of axis anymore
-        if(axis >= broadcast_lens.size())
+        check_shapes{inputs, *this, true}.has(1, 2);
+        auto s0 = inputs.at(0);
+        auto t  = s0.type();
+        if(inputs.size() == 1)
        {
-            MIGRAPHX_THROW("BROADCAST : axis is out of range");
-        }
+            // the ONNX broadcast op is deprecated now, so not handling the negative
+            // value of axis anymore
+            if(axis >= broadcast_lens.size())
+            {
+                MIGRAPHX_THROW("BROADCAST : axis " + migraphx::to_string(axis) +
+                               " is out of range");
+            }
+            if(broadcast_lens.size() - axis < s0.lens().size())
+            {
+                MIGRAPHX_THROW("BROADCAST: (broadcast ndims - axis) is less than s0 ndims");
+            }
+            if(not std::equal(s0.lens().begin(), s0.lens().end(), broadcast_lens.begin() + axis))
+            {
+                MIGRAPHX_THROW("BROADCAST: when broadcasting, succeeding sizes must match");
+            }

-        if(broadcast_lens.size() - axis < input.lens().size())
-        {
-            MIGRAPHX_THROW("BROADCAST: (broadcast ndims - axis) is less than input ndims");
+            std::vector<size_t> bcast_strides(broadcast_lens.size(), 0);
+            std::copy(s0.strides().begin(), s0.strides().end(), bcast_strides.begin() + axis);
+            shape output{t, broadcast_lens, std::move(bcast_strides)};
+            if(output.elements() < s0.elements())
+            {
+                // don't think this can occur?
+                MIGRAPHX_THROW("BROADCAST: output size must be greater than or equal to s0 size");
+            }
+            return output;
        }
-
-        if(not std::equal(input.lens().begin(), input.lens().end(), broadcast_lens.begin() + axis))
+        else
        {
-            MIGRAPHX_THROW("BROADCAST: when broadcasting, succeeding sizes must match");
-        }
-        std::copy(input.strides().begin(), input.strides().end(), bcast_strides.begin() + axis);
+            // two inputs
+            auto s1 = inputs.at(1);
+            if(s0.dynamic())
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 is a dynamic shape, does not handle broadcasting "
+                               "a dynamic shape");
+            }
+            if(s0.ndim() != 1)
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 has ndim " + migraphx::to_string(s0.ndim()) +
+                               ", only handle ndim = 1");
+            }
+            if(axis >= s1.ndim())
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: axis " + migraphx::to_string(axis) +
+                               " is out of range");
+            }
+            if(s1.dynamic())
+            {
+                s0 = s0.to_dynamic();
+                if(s0.dyn_dims()[0] != s1.dyn_dims()[axis])
+                {
+                    MIGRAPHX_THROW("BROADCAST_2in: s0 length doesn't match with dynamic s1 axis "
+                                   "dimension length (" +
+                                   migraphx::to_string(s0.dyn_dims()[0]) +
+                                   " != " + migraphx::to_string(s1.dyn_dims()[axis]) + ")");
+                }
+                return s1;
+            }

-        shape output{t, broadcast_lens, std::move(bcast_strides)};
-        if(output.elements() < input.elements())
-            MIGRAPHX_THROW("BROADCAST: output size must be greater than or equal to input size");
-        return output;
+            if(s0.lens()[0] != s1.lens()[axis])
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 length doesn't match with static s1 axis "
+                               "dimension length (" +
+                               migraphx::to_string(s0.lens()[0]) +
+                               " != " + migraphx::to_string(s1.lens()[axis]) + ")");
+            }
+            std::vector<size_t> bcast_strides(s1.ndim(), 0);
+            std::copy(s0.strides().begin(), s0.strides().end(), bcast_strides.begin() + axis);
+            shape output{t, s1.lens(), std::move(bcast_strides)};
+            return output;
+        }
    }
-    argument compute(shape output_shape, std::vector<argument> args) const
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(output_shape);
+        return args[0].reshape(dyn_out.computed_shape);
    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/op/contiguous.hpp
+++ b/src/include/migraphx/op/contiguous.hpp
@@ -28,6 +28,7 @@
 #include <migraphx/argument.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/dyn_output.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -42,19 +43,27 @@ namespace op {
 struct contiguous
 {
    std::string name() const { return "contiguous"; }
+
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
-        if(inputs.front().standard())
-            return inputs.front();
-        auto lens = inputs.at(0).lens();
-        auto t    = inputs.at(0).type();
-        return {t, lens};
+        check_shapes{inputs, *this, true}.has(1);
+        auto s0 = inputs.front();
+        if(s0.dynamic() or s0.standard())
+        {
+            return s0;
+        }
+        else
+        {
+            const auto& lens = s0.lens();
+            auto t           = s0.type();
+            return {t, lens};
+        }
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        assert(output_shape.standard());
-        argument result{output_shape};
+        assert(dyn_out.computed_shape.standard());
+        argument result{dyn_out.computed_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
            shape_for_each(output.get_shape(), [&](const auto& idx) {
                output(idx.begin(), idx.end()) = input(idx.begin(), idx.end());

--- a/src/include/migraphx/op/multibroadcast.hpp
+++ b/src/include/migraphx/op/multibroadcast.hpp
@@ -26,64 +26,105 @@

 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
+#include <migraphx/dyn_output.hpp>
+#include <migraphx/common.hpp>
 #include <migraphx/config.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

+/**
+ * Broadcast multiple dimensions between two tensors.
+ * Two versions of this operator: one input and two inputs.
+ * One input version uses output_lens attribute and broadcasts to it.
+ * Two inputs version broadcasts both inputs to the common shape at evaluation time.
+ */
 struct multibroadcast
 {
-    std::vector<std::size_t> output_lens;
+    std::vector<std::size_t> output_lens = {};
+
+    // optional attribute
+    std::vector<shape::dynamic_dimension> output_dyn_dims = {};

    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.output_lens, "out_lens"));
+        return pack(f(self.output_lens, "out_lens"), f(self.output_dyn_dims, "out_dyn_dims"));
    }

    std::string name() const { return "multibroadcast"; }

    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
-        auto t     = inputs.at(0).type();
-        auto input = inputs.at(0);
+        check_shapes{inputs, *this, true}.has(1, 2);

-        if(input.lens().empty())
-        {
-            MIGRAPHX_THROW("MULTIBROADCAST: inputs dimensions should be > 0");
-        }
+        auto t  = inputs.at(0).type();
+        auto s0 = inputs.at(0);

-        if(input.lens().size() > output_lens.size())
+        if(s0.max_lens().empty())
        {
-            MIGRAPHX_THROW("MULTIBROADCAST: inputs dimensions should <= output size");
+            MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should be > 0");
        }

-        auto offset = output_lens.size() - input.lens().size();
-        for(std::ptrdiff_t i = input.lens().size() - 1; i >= 0; i--)
+        auto make_bcast_strides = [&](std::vector<std::size_t> bcast_lens, std::size_t offset) {
+            std::vector<size_t> bcast_strides(bcast_lens.size(), 0);
+            for(std::ptrdiff_t i = s0.lens().size() - 1; i >= 0; i--)
+            {
+                if(bcast_lens[i + offset] == s0.lens()[i])
+                {
+                    bcast_strides[i + offset] = s0.strides()[i];
+                }
+            }
+            return bcast_strides;
+        };
+
+        if(inputs.size() == 1)
        {
-            if(output_lens[i + offset] != input.lens()[i] and input.lens()[i] != 1)
+            if(s0.lens().size() > output_lens.size())
            {
-                MIGRAPHX_THROW("MULTIBROADCAST: input shape {" + to_string_range(input.lens()) +
-                               "} cannot be broadcasted to {" + to_string_range(output_lens) +
-                               "}!");
+                MIGRAPHX_THROW("MULTIBROADCAST: input dimensions should <= output size");
            }
-        }

-        std::vector<size_t> bcast_strides(output_lens.size(), 0);
-        for(std::ptrdiff_t i = input.lens().size() - 1; i >= 0; i--)
+            auto offset = output_lens.size() - s0.lens().size();
+            for(std::ptrdiff_t i = s0.lens().size() - 1; i >= 0; i--)
+            {
+                if(output_lens[i + offset] != s0.lens()[i] and s0.lens()[i] != 1)
+                {
+                    MIGRAPHX_THROW("MULTIBROADCAST: input shape {" + to_string_range(s0.lens()) +
+                                   "} cannot be broadcasted to {" + to_string_range(output_lens) +
+                                   "}!");
+                }
+            }
+
+            auto bcast_strides = make_bcast_strides(output_lens, offset);
+            return {t, output_lens, std::move(bcast_strides)};
+        }
+        else
        {
-            if(output_lens[i + offset] == input.lens()[i])
+            // two inputs
+            auto s1 = inputs.at(1);
+            if(s0.dynamic() or s1.dynamic())
            {
-                bcast_strides[i + offset] = input.strides()[i];
+                if(not output_dyn_dims.empty())
+                {
+                    return {t, output_dyn_dims};
+                }
+                return {t, compute_broadcasted_dyn_dims(s0, s1)};
+            }
+            else
+            {
+                auto bcast_lens    = compute_broadcasted_lens(s0.lens(), s1.lens());
+                auto offset        = bcast_lens.size() - s0.lens().size();
+                auto bcast_strides = make_bcast_strides(bcast_lens, offset);
+                return {t, std::move(bcast_lens), std::move(bcast_strides)};
            }
        }
-        return {t, output_lens, bcast_strides};
    }
-    argument compute(shape output_shape, std::vector<argument> args) const
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(output_shape);
+        return args[0].reshape(dyn_out.computed_shape);
    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -30,6 +30,7 @@
 #include <numeric>
 #include <memory>

+#include <migraphx/functional.hpp>
 #include <migraphx/errors.hpp>
 #include <migraphx/half.hpp>
 #include <migraphx/config.hpp>
@@ -89,7 +90,10 @@ struct shape
        std::size_t opt = 0;

        template <class Self, class F>
-        static auto reflect(Self& self, F f);
+        static auto reflect(Self& self, F f)
+        {
+            return pack(f(self.min, "min"), f(self.max, "max"), f(self.opt, "opt"));
+        }

        bool is_fixed() const;
        bool has_optimal() const;
@@ -115,6 +119,12 @@ struct shape

    shape(type_t t, std::vector<dynamic_dimension> dims);

+    // Construct a dynamic shape from three sets of lengths (of the same rank)
+    shape(type_t t,
+          std::vector<std::size_t> mins,
+          std::vector<std::size_t> maxes,
+          std::vector<std::size_t> opts);
+
    template <class Range>
    shape(type_t t, const Range& l) : shape(t, std::vector<std::size_t>(l.begin(), l.end()))
    {
@@ -136,6 +146,12 @@ struct shape
    const std::vector<std::size_t>& lens() const;
    const std::vector<std::size_t>& strides() const;

+    /*!
+     * The number of dimensions in the shape.
+     * Same as the number of indices required to get a data value.
+     */
+    std::size_t ndim() const;
+
    /*!
     * Return the number of elements in the tensor.
     */
@@ -221,6 +237,9 @@ struct shape

    shape with_type(type_t t) const;

+    // convert the shape to an equivalent dynamic shape
+    shape to_dynamic() const;
+
    friend bool operator==(const shape& x, const shape& y);
    friend bool operator!=(const shape& x, const shape& y);
    friend std::ostream& operator<<(std::ostream& os, const shape& x);

--- a/src/onnx/parse_batchnorm.cpp
+++ b/src/onnx/parse_batchnorm.cpp
@@ -44,7 +44,7 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
        {
            epsilon = parser.parse_value(info.attributes.at("epsilon")).at<float>();
        }
-        auto x_lens = args[0]->get_shape().lens();
+        auto x_lens = args[0]->get_shape().max_lens();
        auto x_type = args[0]->get_shape().type();

        if(std::any_of(args.cbegin() + 1, args.cend(), [](auto a) {

--- a/src/onnx/parse_binary_op.cpp
+++ b/src/onnx/parse_binary_op.cpp
@@ -57,6 +57,12 @@ struct parse_binary_op : op_parser<parse_binary_op>
                parser.parse_value(info.attributes.at("broadcast")).at<uint64_t>();
            if(broadcasted != 0)
            {
+                if(std::any_of(
+                       args.cbegin(), args.cend(), [](auto a) { return a->get_shape().dynamic(); }))
+                {
+                    MIGRAPHX_THROW(
+                        "Binary op broadcast attribute not supported for dynamic input shapes");
+                }
                uint64_t axis = parser.parse_value(info.attributes.at("axis")).at<uint64_t>();
                auto l        = info.add_instruction(
                    make_op("broadcast",

--- a/src/pass_manager.cpp
+++ b/src/pass_manager.cpp
@@ -94,11 +94,19 @@ struct module_pm : module_pass_manager
    virtual void run_pass(const pass& p) override
    {
        assert(mod);
+
+        timer ts{};
+        using seconds = std::chrono::duration<double>;
+
        trace("Module: ", mod->name(), ", Pass: ", p.name());
+        const double t1 = ts.record<seconds>();
        assert(mod->validate() == mod->end());
        p.apply(*this);
        trace(*mod);
        validate_pass(*mod, p, *t);
+
+        const double t2 = ts.record<seconds>();
+        trace("Pass: ", p.name(), " completed in (s): ", (t2 - t1));
    }
 };


--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -71,6 +71,19 @@ struct shape_impl
    {
    }

+    shape_impl(shape::type_t t,
+               std::vector<std::size_t> mins,
+               std::vector<std::size_t> maxes,
+               std::vector<std::size_t> opts)
+        : m_type(t)
+    {
+        assert(mins.size() == maxes.size() and maxes.size() == opts.size());
+        for(size_t i = 0; i < mins.size(); ++i)
+        {
+            m_dyn_dims.push_back(shape::dynamic_dimension{mins[i], maxes[i], opts[i]});
+        }
+    }
+
    shape_impl(const std::vector<shape>& subs) : m_type(shape::tuple_type), m_shapes(subs) {}

    shape::type_t m_type;
@@ -224,6 +237,14 @@ shape::shape(type_t t, std::vector<shape::dynamic_dimension> dims)
 {
 }

+shape::shape(type_t t,
+             std::vector<std::size_t> mins,
+             std::vector<std::size_t> maxes,
+             std::vector<std::size_t> opts)
+    : impl(std::make_shared<shape_impl>(t, std::move(mins), std::move(maxes), std::move(opts)))
+{
+}
+
 shape::shape(const std::vector<shape>& subs) : impl(std::make_shared<shape_impl>(subs)) {}

 shape::shape(std::shared_ptr<shape_impl> pimpl) : impl(std::move(pimpl)) {}
@@ -244,6 +265,15 @@ const std::vector<std::size_t>& shape::lens() const { return impl->m_lens; }

 const std::vector<std::size_t>& shape::strides() const { return impl->m_strides; }

+std::size_t shape::ndim() const
+{
+    if(this->dynamic())
+    {
+        return dyn_dims().size();
+    }
+    return lens().size();
+}
+
 std::size_t shape::elements() const { return impl->elements(); }

 std::size_t shape::bytes() const
@@ -437,6 +467,16 @@ shape shape::with_type(type_t t) const
    return {c};
 }

+shape shape::to_dynamic() const
+{
+    if(this->dynamic())
+    {
+        return *this;
+    }
+    std::vector<std::size_t> zeroes(this->ndim(), 0);
+    return {type(), lens(), lens(), zeroes};
+}
+
 std::size_t shape::element_space() const { return impl->element_space(); }

 std::string shape::type_string() const { return name(this->type()); }
@@ -464,15 +504,11 @@ bool shape::dynamic_dimension::is_fixed() const { return this->min == this->max;

 bool shape::dynamic_dimension::has_optimal() const { return opt != 0; }

-template <class Self, class F>
-auto shape::dynamic_dimension::reflect(Self& self, F f)
-{
-    return pack(f(self.min, "min"), f(self.max, "max"), f(self.opt, "opt"));
-}
-
 bool operator==(const shape::dynamic_dimension& x, const shape::dynamic_dimension& y)
 {
-    return (x.min == y.min and x.max == y.max and x.opt == y.opt);
+    // don't check opt if both are fixed
+    return (x.min == y.min and x.max == y.max and
+            ((x.is_fixed() and y.is_fixed()) or (x.opt == y.opt)));
 }

 bool operator!=(const shape::dynamic_dimension& x, const shape::dynamic_dimension& y)

--- a/test/fuse_pointwise.cpp
+++ b/test/fuse_pointwise.cpp
@@ -272,6 +272,35 @@ TEST_CASE(contiguous_input)
    EXPECT(p1 == p2);
 }

+TEST_CASE(contiguous_boolean_input)
+{
+
+    migraphx::shape s{migraphx::shape::bool_type, {2, 3}};
+    migraphx::shape s_lit{migraphx::shape::bool_type, {1}, {0}};
+    migraphx::program p1;
+    {
+        auto* mm = p1.get_main_module();
+        auto x   = mm->add_parameter("x", s);
+        auto one = mm->add_literal(migraphx::literal(s_lit, {1.0}));
+        auto yb =
+            mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), one);
+        auto y    = mm->add_instruction(migraphx::make_op("contiguous"), yb);
+        auto xor1 = mm->add_instruction(migraphx::make_op("logical_xor"), x, y);
+        mm->add_return({xor1});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm  = p2.get_main_module();
+        auto x    = mm->add_parameter("x", s);
+        auto xor1 = add_pointwise(p2, "main:pointwise0", {x}, [=](auto* pm, const auto& inputs) {
+            auto y = pm->add_literal(migraphx::literal(s_lit, {1}));
+            return pm->add_instruction(migraphx::make_op("logical_xor"), inputs[0], y);
+        });
+        mm->add_return({xor1});
+    }
+}
+
 TEST_CASE(all_scalar_input)
 {
    migraphx::shape s{migraphx::shape::float_type};

--- a/test/onnx/binary_dyn_brcst_add_test.onnx
+++ b/test/onnx/binary_dyn_brcst_add_test.onnx
--- a/test/onnx/binary_dyn_brcst_attr_error_test.onnx
+++ b/test/onnx/binary_dyn_brcst_attr_error_test.onnx
--- a/test/onnx/binary_dyn_brcst_mul_test.onnx
+++ b/test/onnx/binary_dyn_brcst_mul_test.onnx
--- a/test/onnx/binary_dyn_brcst_prelu_test.onnx
+++ b/test/onnx/binary_dyn_brcst_prelu_test.onnx
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -420,6 +420,74 @@ def batch_norm_invalid_bias_rank_test():
    return ([node], [x, scale, bias, mean, var], [out])


+@onnx_test
+def binary_dyn_brcst_prelu_test():
+    arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT,
+                                         [None, 3, 4, 5])
+    arg1 = helper.make_tensor_value_info('1', TensorProto.FLOAT, [4, 5])
+    arg_out = helper.make_tensor_value_info('out', TensorProto.FLOAT,
+                                            [None, 3, 4, 5])
+
+    node = onnx.helper.make_node(
+        'PRelu',
+        inputs=['0', '1'],
+        outputs=['out'],
+    )
+
+    return ([node], [arg0, arg1], [arg_out])
+
+
+@onnx_test
+def binary_dyn_brcst_add_test():
+    arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT16, [4, 5])
+    arg1 = helper.make_tensor_value_info('1', TensorProto.FLOAT,
+                                         [None, 3, 4, 5])
+    arg_out = helper.make_tensor_value_info('out', TensorProto.FLOAT,
+                                            [None, 3, 4, 5])
+
+    node = onnx.helper.make_node(
+        'Add',
+        inputs=['0', '1'],
+        outputs=['out'],
+    )
+
+    return ([node], [arg0, arg1], [arg_out])
+
+
+@onnx_test
+def binary_dyn_brcst_attr_error_test():
+    arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT16, [4, 5])
+    arg1 = helper.make_tensor_value_info('1', TensorProto.FLOAT,
+                                         [None, 3, 4, 5])
+    arg_out = helper.make_tensor_value_info('out', TensorProto.FLOAT,
+                                            [None, 3, 4, 5])
+
+    node = onnx.helper.make_node('Add',
+                                 inputs=['0', '1'],
+                                 outputs=['out'],
+                                 broadcast=1,
+                                 axis=1)
+
+    return ([node], [arg0, arg1], [arg_out])
+
+
+@onnx_test
+def binary_dyn_brcst_mul_test():
+    arg0 = helper.make_tensor_value_info('0', TensorProto.FLOAT,
+                                         [None, 3, 4, 5])
+    arg1 = helper.make_tensor_value_info('1', TensorProto.FLOAT, [4, 1])
+    arg_out = helper.make_tensor_value_info('out', TensorProto.FLOAT,
+                                            [None, 3, 4, 5])
+
+    node = onnx.helper.make_node(
+        'Mul',
+        inputs=['0', '1'],
+        outputs=['out'],
+    )
+
+    return ([node], [arg0, arg1], [arg_out])
+
+
 @onnx_test
 def cast_test():
    x = helper.make_tensor_value_info('x', TensorProto.FLOAT16, [10])