Dynamic broadcast (#1424)

Two input version of the broadcast operator to handle dynamic shapes Added comments to describe the versions of the broadcast operator Dynamic broadcast only handles broadcasting a static 1D shape tensor into the other input shape

Dynamic broadcast (#1424)
Two input version of the broadcast operator to handle dynamic shapes Added comments to describe the versions of the broadcast operator Dynamic broadcast only handles broadcasting a static 1D shape tensor into the other input shape
890816bd · Charlie Lin · GitHub · 12f78eec · 890816bd · 890816bd
Unverified Commit 890816bd authored Nov 03, 2022 by Charlie Lin Committed by GitHub Nov 03, 2022
4 changed files
--- a/src/include/migraphx/op/broadcast.hpp
+++ b/src/include/migraphx/op/broadcast.hpp
@@ -27,23 +27,30 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/dyn_output.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {

-/// The broadcast operator performs the numpy-style broadcasting of an axis of a given tensor. This
-/// is achieved primarily by setting the stride of the broadcasted axis to zero. Linear indicies are
-/// computed from multi-indicies by computing the inner product on the multi-index with the strides.
-/// For example, if we have a tensor A(2,3) it has lengths of (2,3) and strides of (3,1). If we want
-/// to compute the linear offset that corresponds to the element on the 2nd row (i = 1) and 3rd
-/// column (j = 2), we compute the following inner product (1,2) dot (3, 1) = 1*3 + 2*1 = 5. It is
-/// obvious from there that we can negate the effects of a given axis by setting the stride of that
-/// axis to zero.
+/**
+ * 1 input version:
+ * Broadcasts a tensor from the original shape to the broadcast_lens by setting the stride of
+ * broadcasted dimensions to zero. `axis` attribute for a 1D input shape is the output dimension
+ * that stays the same. ex: broadcasting shape [1024] -> [4, 1024, 3] has axis = 1 For higher rank
+ * input shapes, axis is an offset parameter for the broadcasting. Such that this operator would
+ * work in the opposite direction of NumPy broadcasting. ex: broadcasting shape [2, 2] -> [2, 2, 3]
+ * with axis = 0
+ *
+ * 2 input version:
+ * Broadcast the first input 1D shape into the second input shape based on the axis parameter.
+ * Handles broadcasting a 1D static shape into a higher rank dynamic shape.
+ * broadcast_lens is not used
+ */
 struct broadcast
 {
    uint64_t axis                           = 0;
-    std::vector<std::size_t> broadcast_lens;
+    std::vector<std::size_t> broadcast_lens = {};

    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -54,36 +61,86 @@ struct broadcast
    std::string name() const { return "broadcast"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        auto input = inputs.at(0);
-        auto t     = input.type();
-
-        std::vector<size_t> bcast_strides(broadcast_lens.size(), 0);
-        // the broacast op is deprecated now, so not handling the negative
+        check_shapes{inputs, *this, true}.has(1, 2);
+        auto s0 = inputs.at(0);
+        auto t  = s0.type();
+        if(inputs.size() == 1)
+        {
+            // the ONNX broadcast op is deprecated now, so not handling the negative
            // value of axis anymore
            if(axis >= broadcast_lens.size())
            {
-            MIGRAPHX_THROW("BROADCAST : axis is out of range");
+                MIGRAPHX_THROW("BROADCAST : axis " + migraphx::to_string(axis) +
+                               " is out of range");
            }
-
-        if(broadcast_lens.size() - axis < input.lens().size())
+            if(broadcast_lens.size() - axis < s0.lens().size())
            {
-            MIGRAPHX_THROW("BROADCAST: (broadcast ndims - axis) is less than input ndims");
+                MIGRAPHX_THROW("BROADCAST: (broadcast ndims - axis) is less than s0 ndims");
            }
-
-        if(not std::equal(input.lens().begin(), input.lens().end(), broadcast_lens.begin() + axis))
+            if(not std::equal(s0.lens().begin(), s0.lens().end(), broadcast_lens.begin() + axis))
            {
                MIGRAPHX_THROW("BROADCAST: when broadcasting, succeeding sizes must match");
            }
-        std::copy(input.strides().begin(), input.strides().end(), bcast_strides.begin() + axis);

+            std::vector<size_t> bcast_strides(broadcast_lens.size(), 0);
+            std::copy(s0.strides().begin(), s0.strides().end(), bcast_strides.begin() + axis);
            shape output{t, broadcast_lens, std::move(bcast_strides)};
-        if(output.elements() < input.elements())
-            MIGRAPHX_THROW("BROADCAST: output size must be greater than or equal to input size");
+            if(output.elements() < s0.elements())
+            {
+                // don't think this can occur?
+                MIGRAPHX_THROW("BROADCAST: output size must be greater than or equal to s0 size");
+            }
+            return output;
+        }
+        else
+        {
+            // two inputs
+            auto s1 = inputs.at(1);
+            if(s0.dynamic())
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 is a dynamic shape, does not handle broadcasting "
+                               "a dynamic shape");
+            }
+            if(s0.ndim() != 1)
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 has ndim " + migraphx::to_string(s0.ndim()) +
+                               ", only handle ndim = 1");
+            }
+            if(axis >= s1.ndim())
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: axis " + migraphx::to_string(axis) +
+                               " is out of range");
+            }
+            if(s1.dynamic())
+            {
+                s0 = s0.to_dynamic();
+                if(s0.dyn_dims()[0] != s1.dyn_dims()[axis])
+                {
+                    MIGRAPHX_THROW("BROADCAST_2in: s0 length doesn't match with dynamic s1 axis "
+                                   "dimension length (" +
+                                   migraphx::to_string(s0.dyn_dims()[0]) +
+                                   " != " + migraphx::to_string(s1.dyn_dims()[axis]) + ")");
+                }
+                return s1;
+            }
+
+            if(s0.lens()[0] != s1.lens()[axis])
+            {
+                MIGRAPHX_THROW("BROADCAST_2in: s0 length doesn't match with static s1 axis "
+                               "dimension length (" +
+                               migraphx::to_string(s0.lens()[0]) +
+                               " != " + migraphx::to_string(s1.lens()[axis]) + ")");
+            }
+            std::vector<size_t> bcast_strides(s1.ndim(), 0);
+            std::copy(s0.strides().begin(), s0.strides().end(), bcast_strides.begin() + axis);
+            shape output{t, s1.lens(), std::move(bcast_strides)};
            return output;
        }
-    argument compute(shape output_shape, std::vector<argument> args) const
+    }
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(output_shape);
+        return args[0].reshape(dyn_out.computed_shape);
    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/onnx/parse_binary_op.cpp
+++ b/src/onnx/parse_binary_op.cpp
@@ -61,7 +61,7 @@ struct parse_binary_op : op_parser<parse_binary_op>
                       args.cbegin(), args.cend(), [](auto a) { return a->get_shape().dynamic(); }))
                {
                    MIGRAPHX_THROW(
-                        "binary op broadcast attribute not supported for dynamic input shapes");
+                        "Binary op broadcast attribute not supported for dynamic input shapes");
                }
                uint64_t axis = parser.parse_value(info.attributes.at("axis")).at<uint64_t>();
                auto l        = info.add_instruction(

--- a/test/op_shape_test.cpp
+++ b/test/op_shape_test.cpp
@@ -118,6 +118,69 @@ TEST_CASE(broadcast)
    }
 }

+TEST_CASE(broadcast_axis_out_of_range_error)
+{
+    std::vector<std::size_t> lens{1, 1};
+    migraphx::shape input{migraphx::shape::float_type, {1}, {0}};
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 4}, {"out_lens", lens}}), input);
+}
+
+TEST_CASE(broadcast_2in_static_static)
+{
+    migraphx::shape a_input{migraphx::shape::float_type, {4}, {1}};
+    migraphx::shape b_input{migraphx::shape::float_type, {4, 4}, {4, 1}};
+    expect_shape(migraphx::shape{migraphx::shape::float_type, {4, 4}, {1, 0}},
+                 migraphx::make_op("broadcast", {{"axis", 0}}),
+                 a_input,
+                 b_input);
+    expect_shape(migraphx::shape{migraphx::shape::float_type, {4, 4}, {0, 1}},
+                 migraphx::make_op("broadcast", {{"axis", 1}}),
+                 a_input,
+                 b_input);
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 2}}), a_input, b_input);
+}
+
+TEST_CASE(broadcast_2in_not_matching_error)
+{
+    migraphx::shape a_input{migraphx::shape::float_type, {4}, {1}};
+    migraphx::shape b_input{migraphx::shape::float_type, {2, 2}, {2, 1}};
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 1}}), a_input, b_input);
+}
+
+TEST_CASE(broadcast_2in_dynamic_s0_error1)
+{
+    migraphx::shape a_input{migraphx::shape::float_type, {4, 2}, {2, 1}};
+    migraphx::shape b_input{migraphx::shape::float_type, {{1, 4, 0}, {4, 4, 0}, {2, 2, 0}}};
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 0}}), b_input, a_input);
+}
+
+TEST_CASE(broadcast_2in_dynamic_s0_error2)
+{
+    std::vector<migraphx::shape::dynamic_dimension> dd{{4, 4, 0}};
+    migraphx::shape a_input{migraphx::shape::float_type, dd};
+    migraphx::shape b_input{migraphx::shape::float_type, {4, 4}, {4, 1}};
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 0}}), a_input, b_input);
+}
+
+TEST_CASE(broadcast_2in_static_dyn)
+{
+    migraphx::shape a_input{migraphx::shape::float_type, {4}, {1}};
+    migraphx::shape b_input{migraphx::shape::float_type, {{1, 4, 0}, {4, 4, 0}, {2, 2, 0}}};
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 0}}), a_input, b_input);
+    expect_shape(migraphx::shape{migraphx::shape::float_type, {{1, 4, 0}, {4, 4, 0}, {2, 2, 0}}},
+                 migraphx::make_op("broadcast", {{"axis", 1}}),
+                 a_input,
+                 b_input);
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 2}}), a_input, b_input);
+}
+
+TEST_CASE(broadcast_2in_dyn_s0_ndim_greater_than_1_error)
+{
+    migraphx::shape a_input{migraphx::shape::float_type, {4, 2}};
+    migraphx::shape b_input{migraphx::shape::float_type, {{1, 4, 0}, {4, 4, 0}, {2, 2, 0}}};
+    throws_shape(migraphx::make_op("broadcast", {{"axis", 0}}), a_input, b_input);
+}
+
 TEST_CASE(convolution_shape)
 {
    migraphx::shape output{migraphx::shape::float_type, {4, 4, 1, 1}};

--- a/test/ref_ops_test.cpp
+++ b/test/ref_ops_test.cpp
@@ -694,6 +694,52 @@ TEST_CASE(broadcast_test)
    EXPECT(output(1, 1) == -3);
 }

+TEST_CASE(broadcast_2in_static_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape a_shape{migraphx::shape::int32_type, {2, 2}};
+    std::vector<int32_t> a_data{0, 0, 0, 0};
+    migraphx::shape b_shape{migraphx::shape::int32_type, {2}};
+    std::vector<int32_t> b_data{-2, -3};
+    uint64_t axis = 0;
+    auto l1       = mm->add_literal(migraphx::literal{a_shape, a_data});
+    auto l2       = mm->add_literal(migraphx::literal{b_shape, b_data});
+    mm->add_instruction(migraphx::make_op("broadcast", {{"axis", axis}}), l2, l1);
+    p.compile(migraphx::ref::target{});
+    auto result = p.eval({}).back();
+    auto output = result.get<int32_t>();
+    EXPECT(output(0, 0) == -2);
+    EXPECT(output(0, 1) == -2);
+    EXPECT(output(1, 0) == -3);
+    EXPECT(output(1, 1) == -3);
+}
+
+TEST_CASE(broadcast_2in_dyn_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape a_shape{migraphx::shape::int32_type, {{2, 2, 0}, {2, 4, 0}}};
+    migraphx::shape b_shape{migraphx::shape::int32_type, {2}};
+    std::vector<int32_t> b_data{-2, -3};
+    uint64_t axis = 0;
+    auto pa       = mm->add_parameter("a", a_shape);
+    auto lb       = mm->add_literal(migraphx::literal{b_shape, b_data});
+    mm->add_instruction(migraphx::make_op("broadcast", {{"axis", axis}}), lb, pa);
+    p.compile(migraphx::ref::target{});
+
+    std::vector<int32_t> a_data{0, 0, 0, 0};
+    migraphx::shape input_fixed_shape0{migraphx::shape::int32_type, {2, 2}};
+    migraphx::parameter_map params0;
+    params0["a"] = migraphx::argument(input_fixed_shape0, a_data.data());
+    auto result  = p.eval(params0).back();
+    auto output  = result.get<int32_t>();
+    EXPECT(output(0, 0) == -2);
+    EXPECT(output(0, 1) == -2);
+    EXPECT(output(1, 0) == -3);
+    EXPECT(output(1, 1) == -3);
+}
+
 TEST_CASE(ceil_test)
 {
    migraphx::program p;