Fallback on cpu when broadcasting

5f04935f · Paul · aa95521b · 5f04935f · 5f04935f · 5f04935f
Commit 5f04935f authored Jun 25, 2018 by Paul
5 changed files
--- a/src/include/rtg/shape.hpp
+++ b/src/include/rtg/shape.hpp
@@ -74,6 +74,7 @@ struct shape
    std::size_t index(std::size_t i) const;

    bool packed() const;
+    bool broadcasted() const;

    friend bool operator==(const shape& x, const shape& y);
    friend bool operator!=(const shape& x, const shape& y);

--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -80,6 +80,14 @@ std::size_t shape::index(std::size_t i) const
                                  });
 }
 bool shape::packed() const { return this->m_packed; }
+
+bool shape::broadcasted() const
+{
+    assert(this->lens().size() == this->strides().size());
+    return std::accumulate(
+        this->strides().begin(), this->strides().end(), std::size_t{1}, std::multiplies<std::size_t>()) == 0; 
+}
+
 std::size_t shape::element_space() const
 {
    // TODO: Get rid of intermediate vector

--- a/src/targets/miopen/miopen_target.cpp
+++ b/src/targets/miopen/miopen_target.cpp
@@ -2,6 +2,7 @@
 #include <rtg/manage_ptr.hpp>
 #include <rtg/instruction.hpp>
 #include <rtg/operators.hpp>
+#include <rtg/shape_for_each.hpp>
 #include <rtg/miopen/miopen.hpp>
 #include <rtg/miopen/hip.hpp>

@@ -93,6 +94,48 @@ struct miopen_pooling
    }
 };

+struct miopen_add
+{
+    std::string name() const { return "miopen::add"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(4);
+        return inputs.at(1);
+    }
+
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        if(args[2].get_shape().broadcasted()) {
+            argument result{output_shape};
+
+            visit_all(result, from_gpu(args[1]), from_gpu(args[2]))([&](auto output, auto input1, auto input2) {
+                shape_for_each(output.get_shape(), [&](const auto& idx) {
+                        output(idx.begin(), idx.end()) =
+                            input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
+                    });
+            });
+            return to_gpu(result);
+        } else {
+        float alpha = 1, beta = 0;
+        auto a_desc = make_tensor(args[1].get_shape());
+        auto b_desc = make_tensor(args[2].get_shape());
+        auto c_desc = make_tensor(output_shape);
+        miopenOpTensor(args[0].implicit(),
+                                            miopenTensorOpAdd,
+                                            &alpha,
+                                            a_desc.get(),
+                                            args[1].implicit(),
+                                            &alpha,
+                                            b_desc.get(),
+                                            args[2].implicit(),
+                                            &beta,
+                                            c_desc.get(),
+                                            args[3].implicit());
+        return args[3];
+        }
+    }
+};
+
 struct miopen_relu
 {
    shared<activation_descriptor> ad;
@@ -143,6 +186,10 @@ struct miopen_apply
            {
                apply_pooling(it);
            }
+            else if(it->op.name() == "add")
+            {
+                apply_add(it);
+            }
        }
    }

@@ -195,6 +242,13 @@ struct miopen_apply
                ins, miopen_relu{std::move(ad)}, handle, ins->arguments.at(0), output);
        }
    }
+
+    void apply_add(instruction_ref ins)
+    {
+        auto output = insert_allocation(ins, ins->result);
+        prog->replace_instruction(
+            ins, miopen_add{}, handle, ins->arguments.at(0), ins->arguments.at(1), output);
+    }
 };

 std::string miopen_target::name() const { return "miopen"; }

--- a/test/include/test.hpp
+++ b/test/include/test.hpp
@@ -114,10 +114,11 @@ struct capture
 };

 template <class T, class F>
-void failed(T x, const char* msg, const char* file, int line, F f)
+void failed(T x, const char* msg, const char * func, const char* file, int line, F f)
 {
    if(!x.value())
    {
+        std::cout << func << std::endl;
        std::cout << file << ":" << line << ":" << std::endl;
        std::cout << "    FAILED: " << msg << " " << x << std::endl;
        f();
@@ -163,10 +164,10 @@ void run_test()

 // NOLINTNEXTLINE
 #define CHECK(...) \
-    test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__, [] {})
+    test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __PRETTY_FUNCTION__, __FILE__, __LINE__, [] {})
 // NOLINTNEXTLINE
 #define EXPECT(...) \
-    test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__, &std::abort)
+    test::failed(test::capture{}->*__VA_ARGS__, #__VA_ARGS__, __PRETTY_FUNCTION__, __FILE__, __LINE__, &std::abort)
 // NOLINTNEXTLINE
 #define STATUS(...) EXPECT((__VA_ARGS__) == 0)


--- a/test/miopen/miopen.cpp
+++ b/test/miopen/miopen.cpp
@@ -50,6 +50,49 @@ void verify_program()
    visit_all(cpu_arg, gpu_arg)([](auto cpu, auto gpu) { EXPECT(test::verify_range(cpu, gpu)); });
 }

+struct test_add
+{
+    rtg::program create_program() const
+    {
+        rtg::program p;
+        rtg::shape s{rtg::shape::float_type, {3}};
+        auto x = p.add_parameter("x", s);
+        auto y = p.add_parameter("y", s);
+        p.add_instruction(rtg::add{}, x, y);
+        return p;
+    }
+
+    rtg::program::parameter_map create_params() const
+    {
+        rtg::program::parameter_map m;
+        m["x"] = rtg::generate_argument({rtg::shape::float_type, {3}});
+        m["y"] = rtg::generate_argument({rtg::shape::float_type, {3}});
+        return m;
+    }
+};
+
+struct test_add_broadcast
+{
+    rtg::program create_program() const
+    {
+        rtg::program p;
+        rtg::shape s{rtg::shape::float_type, {3}};
+        auto x = p.add_parameter("x", {rtg::shape::float_type, {2, 2, 3}});
+        auto y = p.add_parameter("y", {rtg::shape::float_type, {2, 2}});
+        auto by = p.add_instruction(rtg::broadcast{0}, x, y);
+        p.add_instruction(rtg::add{}, x, by);
+        return p;
+    }
+
+    rtg::program::parameter_map create_params() const
+    {
+        rtg::program::parameter_map m;
+        m["x"] = rtg::generate_argument({rtg::shape::float_type, {2, 2, 3}});
+        m["y"] = rtg::generate_argument({rtg::shape::float_type, {2, 2}});
+        return m;
+    }
+};
+
 struct test_conv_relu
 {
    rtg::program create_program() const
@@ -95,6 +138,8 @@ struct test_conv_pooling

 int main()
 {
-    verify_program<test_conv_relu>();
-    verify_program<test_conv_pooling>();
+    // verify_program<test_add>();
+    verify_program<test_add_broadcast>();
+    // verify_program<test_conv_relu>();
+    // verify_program<test_conv_pooling>();
 }