Merge branch 'master' into lenet-test-merge

a5b9ca16 · Paul · 46f750ea · d013de49 · a5b9ca16 · a5b9ca16
Commit a5b9ca16 authored Jun 19, 2018 by Paul
10 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -106,6 +106,8 @@ rocm_enable_cppcheck(
        ${CMAKE_CURRENT_SOURCE_DIR}/src/include
        ${CMAKE_CURRENT_SOURCE_DIR}/src/targets/cpu/include
        ${CMAKE_CURRENT_SOURCE_DIR}/src/targets/miopen/include
+    DEFINE
+        CPPCHECK=1
 )

 add_subdirectory(src)

--- a/src/include/rtg/operators.hpp
+++ b/src/include/rtg/operators.hpp
@@ -179,6 +179,10 @@ struct pooling

        const shape& input = inputs.at(0);
        auto t             = input.type();
+
+        assert(lengths[0] < (input.lens()[3] + 2 * padding[0]));
+        assert(lengths[1] < (input.lens()[4] + 2 * padding[1]));
+
        return {t,
                {
                    input.lens()[0],
@@ -227,6 +231,56 @@ struct activation
    }
 };

+struct transpose
+{
+    std::vector<int64_t> dims;
+    std::string name() const { return "transpose"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        auto input         = inputs.at(0);
+        auto input_lens    = input.lens();
+        auto input_strides = input.strides();
+        auto t             = input.type();
+        if(dims.size() != input_lens.size())
+        {
+            RTG_THROW("Permutation has wrong number of axes");
+        }
+        std::vector<int64_t> axes(dims.size());
+        std::iota(axes.begin(), axes.end(), 0);
+        if(!std::is_permutation(axes.begin(), axes.end(), dims.begin()))
+        {
+            RTG_THROW("Invalid permutation");
+        }
+        std::vector<size_t> output_lens(input_lens.size());
+        std::vector<size_t> output_strides(input_lens.size());
+        for(int i = 0; i < output_lens.size(); i++)
+        {
+            output_lens[i]    = input_lens[dims[i]];
+            output_strides[i] = input_strides[dims[i]];
+        }
+        return {t, output_lens, output_strides};
+    }
+    argument compute(shape, std::vector<argument>) const { RTG_THROW("not computable"); }
+};
+
+struct contiguous
+{
+    std::string name() const { return "contiguous"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        auto lens = inputs.at(0).lens();
+        auto t    = inputs.at(0).type();
+        if(lens.size() < 2)
+        {
+            RTG_THROW("Number of dimensions should exceed 1");
+        }
+        return {t, lens};
+    }
+    argument compute(shape, std::vector<argument>) const { RTG_THROW("not computable"); }
+};
+
 struct reshape
 {
    std::vector<int64_t> dims;

--- a/src/include/rtg/raw_data.hpp
+++ b/src/include/rtg/raw_data.hpp
@@ -3,11 +3,10 @@
 #define RTG_GUARD_RAW_DATA_HPP

 #include <rtg/tensor_view.hpp>
+#include <rtg/requires.hpp>

 namespace rtg {

-#define RTG_REQUIRES(...) class = typename std::enable_if<(__VA_ARGS__)>::type
-
 struct raw_data_base
 {
 };

--- a/src/include/rtg/requires.hpp
+++ b/src/include/rtg/requires.hpp
+#ifndef RTG_GUARD_RTGLIB_REQUIRES_HPP
+#define RTG_GUARD_RTGLIB_REQUIRES_HPP
+
+#include <type_traits>
+
+namespace rtg {
+
+template <bool... Bs>
+struct and_ : std::is_same<and_<Bs...>, and_<(Bs || true)...>> // NOLINT
+{
+};
+
+#ifdef CPPCHECK
+#define RTG_REQUIRES(...) class = void
+#else
+#define RTG_REQUIRES(...) class = typename std::enable_if<and_<__VA_ARGS__, true>{}>::type
+#endif
+
+} // namespace rtg
+
+#endif
--- a/src/include/rtg/shape.hpp
+++ b/src/include/rtg/shape.hpp
@@ -4,6 +4,7 @@
 #include <vector>
 #include <cassert>
 #include <ostream>
+#include <numeric>

 #include <rtg/errors.hpp>

@@ -61,6 +62,14 @@ struct shape
    std::size_t index(std::initializer_list<std::size_t> l) const;
    std::size_t index(const std::vector<std::size_t>& l) const;

+    template <class Iterator>
+    std::size_t index(Iterator start, Iterator last) const
+    {
+        assert(std::distance(start, last) <= this->lens().size());
+        assert(this->lens().size() == this->strides().size());
+        return std::inner_product(start, last, this->strides().begin(), std::size_t{0});
+    }
+
    // Map element index to space index
    std::size_t index(std::size_t i) const;


--- a/src/include/rtg/shape_for_each.hpp
+++ b/src/include/rtg/shape_for_each.hpp
+#ifndef RTG_GUARD_RTGLIB_SHAPE_FOR_EACH_HPP
+#define RTG_GUARD_RTGLIB_SHAPE_FOR_EACH_HPP
+
+#include <rtg/shape.hpp>
+#include <algorithm>
+
+namespace rtg {
+
+template <class F>
+void shape_for_each(const rtg::shape& s, F f)
+{
+    // Ensure calls to f use const ref to vector
+    auto call = [&f](const std::vector<std::size_t>& i) { f(i); };
+    std::vector<std::size_t> indices(s.lens().size());
+    for(std::size_t i = 0; i < s.elements(); i++)
+    {
+        std::transform(s.strides().begin(),
+                       s.strides().end(),
+                       s.lens().begin(),
+                       indices.begin(),
+                       [&](std::size_t stride, std::size_t len) { return (i / stride) % len; });
+        call(indices);
+    }
+}
+
+} // namespace rtg
+
+#endif
--- a/src/include/rtg/tensor_view.hpp
+++ b/src/include/rtg/tensor_view.hpp
@@ -3,6 +3,7 @@

 #include <rtg/shape.hpp>
 #include <rtg/float_equal.hpp>
+#include <rtg/requires.hpp>

 #include <iostream>

@@ -25,18 +26,30 @@ struct tensor_view

    const T* data() const { return this->m_data; }

-    template <class... Ts>
+    template <class... Ts, RTG_REQUIRES(std::is_integral<Ts>{}...)>
    const T& operator()(Ts... xs) const
    {
-        return m_data[m_shape.index({xs...})];
+        return m_data[m_shape.index({static_cast<std::size_t>(xs)...})];
    }

-    template <class... Ts>
+    template <class... Ts, RTG_REQUIRES(std::is_integral<Ts>{}...)>
    T& operator()(Ts... xs)
    {
        return m_data[m_shape.index({static_cast<std::size_t>(xs)...})];
    }

+    template <class Iterator, RTG_REQUIRES(not std::is_integral<Iterator>{})>
+    const T& operator()(Iterator start, Iterator last) const
+    {
+        return m_data[m_shape.index(start, last)];
+    }
+
+    template <class Iterator, RTG_REQUIRES(not std::is_integral<Iterator>{})>
+    T& operator()(Iterator start, Iterator last)
+    {
+        return m_data[m_shape.index(start, last)];
+    }
+
    T& operator[](std::size_t i)
    {
        assert(!this->empty() && i < this->size());

--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -63,13 +63,16 @@ std::size_t shape::index(const std::vector<std::size_t>& l) const
 std::size_t shape::index(std::size_t i) const
 {
    assert(this->lens().size() == this->strides().size());
-    return std::inner_product(
-        this->lens().begin(),
-        this->lens().end(),
-        this->strides().begin(),
-        std::size_t{0},
-        std::plus<std::size_t>{},
-        [&](std::size_t len, std::size_t stride) { return ((i / stride) % len) * stride; });
+    if(this->packed())
+        return i;
+    else
+        return std::inner_product(
+            this->lens().begin(),
+            this->lens().end(),
+            this->strides().begin(),
+            std::size_t{0},
+            std::plus<std::size_t>{},
+            [&](std::size_t len, std::size_t stride) { return ((i / stride) % len) * stride; });
 }
 bool shape::packed() const { return this->m_packed; }
 std::size_t shape::element_space() const

--- a/src/targets/cpu/cpu_target.cpp
+++ b/src/targets/cpu/cpu_target.cpp
@@ -54,6 +54,159 @@ struct cpu_convolution
    }
 };

+struct max_pool
+{
+    static std::string name() { return "max"; }
+    static double start() { return std::numeric_limits<double>::lowest(); }
+
+    static double apply(double x, double y) { return x + y; }
+
+    static double final(double x, double) { return (x); }
+};
+
+struct avg_pool
+{
+    static std::string name() { return "average"; }
+    static double start() { return 0.0; }
+
+    static double apply(double x, double y)
+    {
+        double m = std::max(x, y);
+        return (m);
+    }
+
+    static double final(double x, double y) { return x / y; }
+};
+
+template <class Op>
+struct cpu_pooling
+{
+    pooling op;
+
+    std::string name() const { return "cpu::pooling_" + Op::name(); }
+    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        visit_all(result, args[0])([&](auto output, auto input) {
+            using type = typename decltype(output)::value_type;
+            auto in_h  = input.get_shape().lens()[2];
+            auto in_w  = input.get_shape().lens()[3];
+
+            dfor(output_shape.lens()[0],
+                 output_shape.lens()[1],
+                 output_shape.lens()[2],
+                 output_shape.lens()[3])(
+                [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
+                    const int start_x0 = i * op.stride[0] - op.padding[0];
+                    const int start_y0 = j * op.stride[1] - op.padding[1];
+
+                    const int hend = std::min(start_x0 + op.lengths[0], in_h);
+                    const int wend = std::min(start_y0 + op.lengths[1], in_w);
+
+                    const int start_x = std::max(start_x0, 0);
+                    const int start_y = std::max(start_y0, 0);
+
+                    const int w_h       = (hend - start_x);
+                    const int w_w       = (wend - start_y);
+                    const int pool_size = std::max(w_h * w_w, 1);
+
+                    double acc = Op::start();
+                    dfor(w_h, w_w)([&](int x, int y) {
+                        const int in_x = start_x + x;
+                        const int in_y = start_y + y;
+                        if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
+                        {
+                            acc = Op::apply(acc, input(o, w, in_x, in_y));
+                        }
+                    });
+                    output(o, w, i, j) = type(Op::final(acc, pool_size));
+                });
+        });
+        return result;
+    }
+};
+
+struct cpu_transpose
+{
+    transpose op;
+
+    std::string name() const { return "cpu::transpose"; }
+    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        return {output_shape, std::move(args.front().data)};
+    }
+};
+
+struct cpu_contiguous
+{
+    contiguous op;
+    std::string name() const { return "cpu::contiguous"; }
+    shape compute_shape(std::vector<shape> inputs) const { return op.compute_shape(inputs); }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        visit_all(result, args[0])([&](auto output, auto input) {
+            auto input_shape = args[0].get_shape();
+            auto ndim        = output_shape.lens().size();
+            using value_type = typename decltype(input)::value_type;
+            value_type* ptr  = static_cast<value_type*>(output.data());
+            if(ndim == 2)
+            {
+                dfor(input_shape.lens()[0], input_shape.lens()[1])(
+                    [&](std::size_t i0, std::size_t i1) { *ptr++ = input(i0, i1); });
+            }
+            else if(ndim == 3)
+            {
+                dfor(input_shape.lens()[0], input_shape.lens()[1], input_shape.lens()[2])(
+                    [&](std::size_t i0, std::size_t i1, std::size_t i2) {
+                        *ptr++ = input(i0, i1, i2);
+                    });
+            }
+            else if(ndim == 4)
+            {
+                dfor(input_shape.lens()[0],
+                     input_shape.lens()[1],
+                     input_shape.lens()[2],
+                     input_shape.lens()[3])(
+                    [&](std::size_t i0, std::size_t i1, std::size_t i2, std::size_t i3) {
+                        *ptr++ = input(i0, i1, i2, i3);
+                    });
+            }
+            else if(ndim == 5)
+            {
+                dfor(input_shape.lens()[0],
+                     input_shape.lens()[1],
+                     input_shape.lens()[2],
+                     input_shape.lens()[3],
+                     input_shape.lens()[4])(
+                    [&](std::size_t i0,
+                        std::size_t i1,
+                        std::size_t i2,
+                        std::size_t i3,
+                        std::size_t i4) { *ptr++ = input(i0, i1, i2, i3, i4); });
+            }
+            else if(ndim == 6)
+            {
+                dfor(input_shape.lens()[0],
+                     input_shape.lens()[1],
+                     input_shape.lens()[2],
+                     input_shape.lens()[3],
+                     input_shape.lens()[4],
+                     input_shape.lens()[5])(
+                    [&](std::size_t i0,
+                        std::size_t i1,
+                        std::size_t i2,
+                        std::size_t i3,
+                        std::size_t i4,
+                        std::size_t i5) { *ptr++ = input(i0, i1, i2, i3, i4, i5); });
+            }
+        });
+        return result;
+    }
+};
+
 struct cpu_reshape
 {
    reshape op;
@@ -390,62 +543,52 @@ struct cpu_binary
 struct cpu_apply
 {
    program* prog;
+    std::unordered_map<std::string, std::function<void(instruction_ref)>> apply_map{};
+
+    template <class T>
+    auto simple_op()
+    {
+        return [this](instruction_ref ins) { apply_simple_op<T>(ins); };
+    }
+
+    template <class T, class Op>
+    auto extend_op()
+    {
+        return [this](instruction_ref ins) { apply_extend_op<T, Op>(ins); };
+    }
+
+    void init()
+    {
+        apply_map["convolution"] = extend_op<cpu_convolution, convolution>();
+        apply_map["gemm"]        = extend_op<cpu_gemm, gemm>();
+        apply_map["reshape"]     = extend_op<cpu_reshape, reshape>();
+        apply_map["contiguous"]  = extend_op<cpu_contiguous, contiguous>();
+        apply_map["transpose"]   = extend_op<cpu_transpose, transpose>();
+
+        apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
+        apply_map["tanh"]     = simple_op<cpu_unary<tanh_op>>();
+        apply_map["sigmoid"]  = simple_op<cpu_unary<sigmoid_op>>();
+        apply_map["exp"]      = simple_op<cpu_unary<exp_op>>();
+        apply_map["neg"]      = simple_op<cpu_unary<neg_op>>();
+        apply_map["sin"]      = simple_op<cpu_unary<sin_op>>();
+        apply_map["cos"]      = simple_op<cpu_unary<cos_op>>();
+        apply_map["tan"]      = simple_op<cpu_unary<tan_op>>();
+
+        apply_map["softmax"] = simple_op<softmax2d>();
+    }

    void apply()
    {
+        init();
        for(auto it = prog->begin(); it != prog->end(); it++)
        {
-            if(it->op.name() == "convolution")
-            {
-                apply_convolution(it);
-            }
-            else if(it->op.name() == "gemm")
-            {
-                apply_gemm(it);
-            }
-            else if(it->op.name() == "reshape")
-            {
-                apply_reshape(it);
-            }
-            else if(it->op.name() == "activation")
+            if(it->op.name() == "activation")
            {
                apply_activation(it);
            }
-            else if(it->op.name() == "identity")
-            {
-                apply_identity(it);
-            }
-            else if(it->op.name() == "softmax")
-            {
-                apply_softmax(it);
-            }
-            else if(it->op.name() == "tanh")
-            {
-                apply_tanh(it);
-            }
-            else if(it->op.name() == "sigmoid")
-            {
-                apply_sigmoid(it);
-            }
-            else if(it->op.name() == "exp")
-            {
-                apply_exp(it);
-            }
-            else if(it->op.name() == "neg")
-            {
-                apply_neg(it);
-            }
-            else if(it->op.name() == "sin")
-            {
-                apply_sin(it);
-            }
-            else if(it->op.name() == "cos")
+            else if(apply_map.count(it->op.name()) > 0)
            {
-                apply_cos(it);
-            }
-            else if(it->op.name() == "tan")
-            {
-                apply_tan(it);
+                apply_map.at(it->op.name())(it);
            }
            else if(it->op.name() == "add")
            {
@@ -466,22 +609,17 @@ struct cpu_apply
        }
    }

-    void apply_convolution(instruction_ref ins)
+    template <class T>
+    void apply_simple_op(instruction_ref ins)
    {
-        auto&& op = any_cast<convolution>(ins->op);
-        prog->replace_instruction(ins, cpu_convolution{op}, ins->arguments);
+        prog->replace_instruction(ins, T{}, ins->arguments);
    }

-    void apply_gemm(instruction_ref ins)
+    template <class T, class Op>
+    void apply_extend_op(instruction_ref ins)
    {
-        auto&& op = any_cast<gemm>(ins->op);
-        prog->replace_instruction(ins, cpu_gemm{op}, ins->arguments);
-    }
-
-    void apply_reshape(instruction_ref ins)
-    {
-        auto&& op = any_cast<reshape>(ins->op);
-        prog->replace_instruction(ins, cpu_reshape{op}, ins->arguments);
+        auto&& op = any_cast<Op>(ins->op);
+        prog->replace_instruction(ins, T{op}, ins->arguments);
    }

    void apply_activation(instruction_ref ins)
@@ -491,49 +629,13 @@ struct cpu_apply
            prog->replace_instruction(ins, cpu_unary<relu_op>{}, ins->arguments);
    }

-    void apply_identity(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, cpu_unary<identity_op>{}, ins->arguments);
-    }
-
-    void apply_softmax(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, softmax2d{}, ins->arguments);
-    }
-
-    void apply_tanh(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, cpu_unary<tanh_op>{}, ins->arguments);
-    }
-
-    void apply_sigmoid(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, cpu_unary<sigmoid_op>{}, ins->arguments);
-    }
-
-    void apply_exp(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, cpu_unary<exp_op>{}, ins->arguments);
-    }
-
-    void apply_neg(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, cpu_unary<neg_op>{}, ins->arguments);
-    }
-
-    void apply_sin(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, cpu_unary<sin_op>{}, ins->arguments);
-    }
-
-    void apply_cos(instruction_ref ins)
-    {
-        prog->replace_instruction(ins, cpu_unary<cos_op>{}, ins->arguments);
-    }
-
-    void apply_tan(instruction_ref ins)
+    void apply_pooling(instruction_ref ins)
    {
-        prog->replace_instruction(ins, cpu_unary<tan_op>{}, ins->arguments);
+        auto&& op = any_cast<pooling>(ins->op);
+        if(op.mode == "max")
+            prog->replace_instruction(ins, cpu_pooling<max_pool>{op}, ins->arguments);
+        else if(op.mode == "average")
+            prog->replace_instruction(ins, cpu_pooling<avg_pool>{op}, ins->arguments);
    }

    void apply_add(instruction_ref ins)

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -479,6 +479,63 @@ void conv2d_padding_stride_test()
    EXPECT(test::verify_range(results_vector, s));
 }

+void transpose_test()
+{
+    rtg::shape a_shape{rtg::shape::float_type, {1, 2, 2, 3}};
+    std::vector<float> data(12);
+    std::iota(data.begin(), data.end(), 0);
+
+    {
+        rtg::program p;
+        auto l                    = p.add_literal(rtg::literal{a_shape, data});
+        std::vector<int64_t> perm = {0, 3, 1, 2};
+        p.add_instruction(rtg::transpose{perm}, l);
+        p.compile(rtg::cpu::cpu_target{});
+        auto result = p.eval({});
+
+        result.visit([&](auto output) {
+            std::vector<size_t> new_lens    = {1, 3, 2, 2};
+            std::vector<size_t> new_strides = {12, 1, 6, 3};
+            EXPECT(bool{output.get_shape().lens() == new_lens});
+            EXPECT(bool{output.get_shape().strides() == new_strides});
+        });
+    }
+    {
+        rtg::program p;
+        auto l                    = p.add_literal(rtg::literal{a_shape, data});
+        std::vector<int64_t> perm = {0, 3, 1, 2};
+        auto result               = p.add_instruction(rtg::transpose{perm}, l);
+        p.add_instruction(rtg::contiguous{}, result);
+        p.compile(rtg::cpu::cpu_target{});
+        auto result2 = p.eval({});
+
+        std::vector<float> results_vector(12);
+        result2.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+        std::vector<float> gold = {0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11};
+        EXPECT(test::verify_range(results_vector, gold));
+    }
+}
+
+void contiguous_test()
+{
+    rtg::shape a_shape{rtg::shape::float_type, {1, 3, 2, 2}, {12, 1, 6, 3}};
+    std::vector<float> data(12);
+    std::iota(data.begin(), data.end(), 0);
+
+    rtg::program p;
+    auto l = p.add_literal(rtg::literal{a_shape, data});
+    p.add_instruction(rtg::contiguous{}, l);
+    p.compile(rtg::cpu::cpu_target{});
+    auto result = p.eval({});
+
+    std::vector<float> results_vector(12);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    std::vector<size_t> new_lens    = {1, 3, 2, 2};
+    std::vector<size_t> new_strides = {12, 1, 6, 3};
+    std::vector<float> gold         = {0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11};
+    EXPECT(test::verify_range(results_vector, gold));
+}
+
 int main()
 {
    fred();
@@ -491,6 +548,8 @@ int main()
    mul_test();
    gemm_test();
    reshape_test();
+    transpose_test();
+    contiguous_test();
    softmax_test();
    conv2d_test();
    conv2d_padding_test();