Add cpu backend

fd26582a · Paul · 6f0e001e · fd26582a · fd26582a · fd26582a
Commit fd26582a authored May 21, 2018 by Paul
7 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,6 +49,7 @@ rocm_enable_clang_tidy(
        -llvm-header-guard
        -llvm-include-order
        -misc-macro-parentheses
+        -modernize-use-auto
        -modernize-pass-by-value
        -modernize-use-default-member-init
        -modernize-use-transparent-functors

--- a/src/include/rtg/dfor.hpp
+++ b/src/include/rtg/dfor.hpp
+#ifndef RTG_GUARD_RTGLIB_DFOR_HPP
+#define RTG_GUARD_RTGLIB_DFOR_HPP
+
+namespace rtg {
+
+// Multidimensional for loop
+inline auto dfor()
+{
+    return [](auto f)
+    {
+        f();
+    };
+}
+
+template<class T, class... Ts>
+auto dfor(T x, Ts... xs)
+{
+    return [=](auto f) 
+    {
+        for(T i = 0; i < x; i++)
+        {
+            dfor(xs...)([&](Ts... is) { f(i, is...); });
+        }
+    };
+}
+
+} // namespace rtg
+
+#endif
--- a/src/include/rtg/manage_ptr.hpp
+++ b/src/include/rtg/manage_ptr.hpp
+#ifndef RTG_GUARD_RTG_MANAGE_PTR_HPP
+#define RTG_GUARD_RTG_MANAGE_PTR_HPP
+
+#include <memory>
+#include <type_traits>
+
+namespace rtg {
+
+template <class F, F f>
+struct manage_deleter
+{
+    template <class T>
+    void operator()(T* x) const
+    {
+        if(x != nullptr)
+        {
+            f(x);
+        }
+    }
+};
+
+struct null_deleter
+{
+    template <class T>
+    void operator()(T*) const
+    {
+    }
+};
+
+template <class T, class F, F f>
+using manage_ptr = std::unique_ptr<T, manage_deleter<F, f>>;
+
+template <class T>
+struct element_type
+{
+    using type = typename T::element_type;
+};
+
+template <class T>
+using remove_ptr = typename std::conditional_t<std::is_pointer<T>{},
+                                             std::remove_pointer<T>,
+                                             element_type<T>>::type;
+
+template <class T>
+using shared = std::shared_ptr<remove_ptr<T>>;
+
+} // namespace rtg
+
+#define RTG_MANAGE_PTR(T, F) \
+    rtg::manage_ptr<std::remove_pointer_t<T>, decltype(&F), &F> // NOLINT
+
+#endif
--- a/src/include/rtg/tensor_view.hpp
+++ b/src/include/rtg/tensor_view.hpp
@@ -33,7 +33,7 @@ struct tensor_view
    template <class... Ts>
    T& operator()(Ts... xs)
    {
-        return m_data[m_shape.index({xs...})];
+        return m_data[m_shape.index({static_cast<std::size_t>(xs)...})];
    }

    T& operator[](std::size_t i)

--- a/src/targets/cpu/CMakeLists.txt
+++ b/src/targets/cpu/CMakeLists.txt
+
+add_library(rtg_cpu
+    cpu_target.cpp
+)
+rocm_clang_tidy_check(rtg_cpu)
+target_link_libraries(rtg_cpu rtg)
+target_include_directories(rtg_cpu PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
--- a/src/targets/cpu/cpu_target.cpp
+++ b/src/targets/cpu/cpu_target.cpp
+
+#include <rtg/cpu/cpu_target.hpp>
+#include <rtg/instruction.hpp>
+#include <rtg/dfor.hpp>
+#include <rtg/operators.hpp>
+
+namespace rtg { namespace cpu {
+
+struct cpu_convolution
+{
+    convolution op;
+
+    std::string name() const
+    {
+        return "cpu::convolution";
+    }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        return op.compute_shape(inputs);
+    }
+    argument compute(std::vector<argument> args) const
+    {
+        shape output_shape = compute_shape({args[0].get_shape(), args[1].get_shape()});
+        argument result{compute_shape({args[0].get_shape(), args[1].get_shape()})};
+        result.visit([&](auto output) {
+            args[0].visit([&](auto input) {
+                args[1].visit([&](auto weights) {
+                    auto in_n = input.get_shape().lens()[0];
+                    auto in_c = input.get_shape().lens()[1];
+                    auto in_h = input.get_shape().lens()[2];
+                    auto in_w = input.get_shape().lens()[3];
+
+                    auto wei_c = weights.get_shape().lens()[1];
+                    auto wei_h = weights.get_shape().lens()[2];
+                    auto wei_w = weights.get_shape().lens()[3];
+
+                    dfor(in_n, in_c, in_h, in_w)([&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
+                        const int start_x = i * op.stride[0] - op.padding[0];
+                        const int start_y = j * op.stride[1] - op.padding[1];
+
+                        double acc = 0;
+                        dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
+                            const int in_x = start_x + x;
+                            const int in_y = start_y + y;
+                            if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
+                            {
+                                acc += input(o, k, in_x, in_y) * weights(w, k, x, y);
+                            }
+                        });
+                        output(o, w, i, j) = acc;
+                    });
+
+                });
+            });
+        });
+        return result;
+    }
+};
+
+struct relu
+{
+    std::string name() const
+    {
+        return "cpu::relu";
+    }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        return inputs.front();
+    }
+
+    argument compute(std::vector<argument> args) const 
+    {
+        argument result{args[0].get_shape()};
+        result.visit([&](auto output) {
+            args[0].visit([&](auto input) {
+                std::transform(input.begin(), input.end(), output.begin(), [](auto x) {
+                    return x > 0 ? x : 0;
+                });
+            });
+        });
+        return result;
+    }
+};
+
+struct cpu_apply
+{
+    program * prog;
+
+    void apply()
+    {
+        for(auto it = prog->begin();it != prog->end();it++) {
+            if (it->op.name() == "convolution") {
+                apply_convolution(it);
+            } else if (it->op.name() == "activation") {
+                apply_activation(it);
+            }
+        }
+    }
+
+    void apply_convolution(instruction_ref ins)
+    {
+        auto&& op = any_cast<convolution>(ins->op);
+        prog->replace_instruction(ins, cpu_convolution{op}, ins->arguments);
+    }
+
+    void apply_activation(instruction_ref ins)
+    {
+        auto&& op = any_cast<activation>(ins->op);
+        if(op.mode == "relu")
+            prog->replace_instruction(ins, relu{}, ins->arguments);
+    }
+
+};
+
+std::string cpu_target::name() const
+{
+    return "cpu";
+}
+
+void cpu_target::apply(program& p) const
+{
+    cpu_apply{&p}.apply();
+}
+
+} // namespace cpu
+
+} // namespace rtg
--- a/src/targets/cpu/include/rtg/cpu/cpu_target.hpp
+++ b/src/targets/cpu/include/rtg/cpu/cpu_target.hpp
+#ifndef RTG_GUARD_RTGLIB_CPU_TARGET_HPP
+#define RTG_GUARD_RTGLIB_CPU_TARGET_HPP
+
+#include <rtg/program.hpp>
+
+namespace rtg { namespace cpu {
+
+struct cpu_target
+{
+    std::string name() const;
+    void apply(program& p) const;
+};
+
+} // namespace cpu
+
+} // namespace rtg
+
+#endif