Add conv ND for cpu (#561)

* Initial cpu conv-nd * Formatting * Make index signed * Formatting * Assert the indices are greater than 0 * Use equal instead of lexicographical_compare * Formatting * Fix tidy errors * Formatting * Handle different types * Formatting * Fix nested visits * Formatting * Add 3d conv test * Formatting * revert unnecessary changes * remove a print line * Fix ICE * Formatting * Use absolute path Co-authored-by: Shucai Xiao <shucai.xiao@amd.com> Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>

Add conv ND for cpu (#561)
* Initial cpu conv-nd * Formatting * Make index signed * Formatting * Assert the indices are greater than 0 * Use equal instead of lexicographical_compare * Formatting * Fix tidy errors * Formatting * Handle different types * Formatting * Fix nested visits * Formatting * Add 3d conv test * Formatting * revert unnecessary changes * remove a print line * Fix ICE * Formatting * Use absolute path Co-authored-by: Shucai Xiao <shucai.xiao@amd.com> Co-authored-by: mvermeulen <5479696+mvermeulen@users.noreply.github.com>
5cc6e160 · Paul Fultz II · GitHub · d1258e80 · 5cc6e160 · 5cc6e160
Unverified Commit 5cc6e160 authored Jul 09, 2020 by Paul Fultz II Committed by GitHub Jul 09, 2020
6 changed files
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -144,9 +144,9 @@ rocmtest tidy: rocmnode('rocmtest') { cmake_build ->
        env.CODECOV_TOKEN="8545af1c-f90b-4345-92a5-0d075503ca56"
        sh '''
            cd build
-            lcov --directory . --capture --output-file coverage.info
-            lcov --remove coverage.info '/usr/*' --output-file coverage.info
-            lcov --list coverage.info
+            lcov --directory . --capture --output-file $(pwd)/coverage.info
+            lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
+            lcov --list $(pwd)/coverage.info
            curl -s https://codecov.io/bash | bash
            echo "Uploaded"
        '''

--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -93,7 +93,7 @@ struct shape
    {
        assert(std::distance(start, last) <= this->lens().size());
        assert(this->lens().size() == this->strides().size());
-        return std::inner_product(start, last, this->strides().begin(), std::size_t{0});
+        return std::inner_product(start, last, this->strides().begin(), std::size_t{0}); // NOLINT
    }

    /// Map element index to space index

--- a/src/include/migraphx/tensor_view.hpp
+++ b/src/include/migraphx/tensor_view.hpp
@@ -56,12 +56,16 @@ struct tensor_view
    template <class Iterator, MIGRAPHX_REQUIRES(not std::is_integral<Iterator>{})>
    const T& operator()(Iterator start, Iterator last) const
    {
+        assert(std::distance(start, last) > 0);
+        assert(std::all_of(start, last, [](auto x) { return x >= 0; }));
        return m_data[m_shape.index(start, last)];
    }

    template <class Iterator, MIGRAPHX_REQUIRES(not std::is_integral<Iterator>{})>
    T& operator()(Iterator start, Iterator last)
    {
+        assert(std::distance(start, last) > 0);
+        assert(std::all_of(start, last, [](auto x) { return x >= 0; }));
        return m_data[m_shape.index(start, last)];
    }


--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -166,6 +166,21 @@ struct cpu_lrn
    }
 };

+template <class V, class T, class... Ts>
+void visit_quantize_impl(V&& v, T&& x, Ts&&... xs)
+{
+    x.visit([&](auto y) { visit_all(xs...)([&](auto... ys) { v(y, ys...); }); });
+}
+
+template <class T, class... Ts>
+auto visit_quantize(T&& x, Ts&&... xs)
+{
+    return [&](auto v) {
+        // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70100
+        visit_quantize_impl(v, x, xs...);
+    };
+}
+
 template <class Op>
 struct cpu_convolution
 {
@@ -182,38 +197,57 @@ struct cpu_convolution
    argument compute(context&, shape output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
-        result.visit([&](auto output) {
-            using type = typename decltype(output)::value_type;
-            visit_all(args[0], args[1])([&](auto input, auto weights) {
-                auto in   = input.get_shape().lens();
-                auto in_h = in[2];
-                auto in_w = in[3];
+        visit_quantize(result, args[0], args[1])([&](auto output, auto input, auto weights) {
+            auto in_lens = input.get_shape().lens();

-                auto wei   = weights.get_shape().lens();
-                auto wei_n = wei[0];
-                auto wei_c = wei[1];
-                auto wei_h = wei[2];
-                auto wei_w = wei[3];
+            auto wei_lens = weights.get_shape().lens();
+            auto wei_n    = wei_lens[0];
+            auto wei_c    = wei_lens[1];
+            std::vector<std::size_t> win_size(wei_lens.begin() + 1, wei_lens.end());

-                par_dfor(output_shape.lens()[0],
-                         output_shape.lens()[1],
-                         output_shape.lens()[2],
-                         output_shape.lens()[3])(
-                    [&](std::size_t o, std::size_t w, std::size_t i, std::size_t j) {
-                        const auto start_x  = i * op.stride[0] - op.padding[0];
-                        const auto start_y  = j * op.stride[1] - op.padding[1];
+            par_for(output_shape.elements(), [&](auto i) {
+                auto idx_o = output_shape.multi(i);
+                auto w     = idx_o[1];
+                auto n_dim = idx_o.size();
+
+                std::vector<std::ptrdiff_t> win_start;
+                for(std::size_t dim = 2; dim < n_dim; ++dim)
+                {
+                    auto d_2 = dim - 2;
+                    win_start.push_back(std::ptrdiff_t(idx_o[dim] * op.stride[d_2]) -
+                                        std::ptrdiff_t(op.padding[d_2]));
+                }
                const auto group_id = w / (wei_n / op.group);

-                        type acc = type{0};
-                        dfor(wei_c, wei_h, wei_w)([&](std::size_t k, std::size_t x, std::size_t y) {
-                            const auto in_x  = start_x + x;
-                            const auto in_y  = start_y + y;
+                shape win_shape{output_shape.type(), win_size};
+
+                double acc = 0.0;
+                shape_for_each(win_shape, [&](auto idx_win) {
+                    auto k           = idx_win[0];
                    const auto in_ch = group_id * wei_c + k;
-                            if(in_x >= 0 && in_x < in_h && in_y >= 0 && in_y < in_w)
-                                acc += input(o, in_ch, in_x, in_y) * weights(w, k, x, y);
-                        });
-                        output(o, w, i, j) = acc;
+                    std::vector<std::ptrdiff_t> idx(idx_o.begin(), idx_o.end());
+                    idx[1] = in_ch;
+                    std::transform(idx_win.begin() + 1,
+                                   idx_win.end(),
+                                   win_start.begin(),
+                                   idx.begin() + 2,
+                                   [](std::ptrdiff_t ii, std::ptrdiff_t jj) { return ii + jj; });
+                    std::vector<std::ptrdiff_t> idx_wei(idx_o.size());
+                    idx_wei[0] = w;
+                    std::copy(idx_win.begin(), idx_win.end(), idx_wei.begin() + 1);
+                    if(std::all_of(idx.begin() + 2, idx.end(), [&](auto ii) { return ii >= 0; }) and
+                       std::equal(idx.begin(),
+                                  idx.end(),
+                                  in_lens.begin(),
+                                  in_lens.end(),
+                                  std::less<std::ptrdiff_t>{}))
+                    {
+                        acc +=
+                            input(idx.begin(), idx.end()) * weights(idx_wei.begin(), idx_wei.end());
+                    }
                });
+
+                output[i] = acc;
            });
        });
        return result;

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -1890,6 +1890,69 @@ TEST_CASE(conv2d_test)
    EXPECT(migraphx::verify_range(results_vector, s));
 }

+TEST_CASE(conv3d_test)
+{
+    migraphx::program p;
+    std::vector<float> a = {
+        2.71567607,  -0.9960829,  0.91671127,  0.28140706,  0.63235772,  0.08077253,  0.80927712,
+        -0.59108931, -1.05421555, -2.76622486, -0.85044265, -0.52049929, 0.67726439,  -0.65290606,
+        0.02345525,  -0.33579525, 0.38901961,  1.05473483,  -1.31188095, 1.8963089,   -0.07265259,
+        0.947339,    0.41949373,  -0.70814759, 0.25892952,  1.07311416,  1.2571274,   -0.62318051,
+        -0.19951548, -0.94232577, -0.29393643, 0.42292568,  -0.80230367, 1.40909171,  0.63617158,
+        0.13900366,  1.09253144,  -0.15265895, 1.54781747,  0.72780299,  1.09189606,  -0.38068101,
+        0.97057933,  -0.58958799, 1.56188643,  0.21474874,  0.58725154,  -1.27097559, -0.03024297,
+        1.09437096,  -0.4897908,  0.34838957,  -1.31042492, -1.69069934, 0.86956722,  -0.40457946,
+        0.46691212,  1.29273605,  0.26464137,  0.22073045,  -1.02178168, 0.22163901,  -1.84387338,
+        0.75522131,  -0.45775682, -0.42241111, -1.50944722, 1.07256448,  -1.95876884, -0.28106022,
+        0.3341668,   2.13129425,  -1.14728117, -1.06555498, -0.298444,   -0.88322699, -0.65866792,
+        -2.06007552, 0.01374334,  0.45612028,  0.52715492,  1.01914406,  -1.72659791, 0.80650896,
+        0.16860051,  2.24112225,  -0.78620857, 0.36566174,  -0.07020134, -0.47976932, -0.68230027,
+        -0.94711417, -0.54506505, 1.66504931,  -0.71860826, 0.61132306};
+
+    std::vector<float> c = {
+        2.82721668e-02,  6.44195229e-02,  1.53499246e-02,  1.72468081e-01,  -6.33238107e-02,
+        9.49496776e-02,  1.40258059e-01,  -7.92879611e-02, -1.29301161e-01, 3.11307609e-03,
+        -1.90624535e-01, 1.13238767e-01,  -2.80647576e-02, 3.12882811e-02,  -3.52091640e-02,
+        3.33581865e-02,  6.43158704e-02,  7.40238279e-02,  -1.00106120e-01, -9.56912562e-02,
+        1.44342467e-01,  9.40258950e-02,  6.36333972e-02,  1.66158378e-03,  -8.91554281e-02,
+        2.58734226e-02,  1.70919895e-02,  1.78214177e-01,  8.84564668e-02,  8.98126513e-02,
+        -1.63809001e-01, 1.37802169e-01,  1.66439757e-01,  -1.45631135e-02, 1.88469887e-04,
+        4.76950556e-02,  -1.91969007e-01, -1.76233292e-01, -7.70473927e-02, 1.14828631e-01,
+        1.76608220e-01,  -1.50728196e-01, 1.99946314e-02,  -5.88052124e-02, 1.31612435e-01,
+        1.61106288e-02,  -1.35080189e-01, 1.49512306e-01,  3.86456847e-02,  1.29330024e-01,
+        -3.22975963e-02, -5.60784787e-02, -5.41997552e-02, 4.78562862e-02};
+
+    std::vector<float> s = {0.27039781,
+                            0.19105849,
+                            -0.06339942,
+                            -0.65087199,
+                            0.40867025,
+                            0.05063812,
+                            -0.14907975,
+                            0.49018705,
+                            -0.49197209,
+                            0.33236548,
+                            -0.39374301,
+                            0.16012701,
+                            0.06574871,
+                            0.71606487,
+                            -0.55201721,
+                            -0.46427044};
+    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 4, 4, 1}};
+    auto al = p.add_literal(migraphx::literal{a_shape, a});
+
+    migraphx::shape c_shape{migraphx::shape::float_type, {2, 3, 3, 3, 1}};
+    auto cl = p.add_literal(migraphx::literal{c_shape, c});
+
+    p.add_instruction(migraphx::op::convolution{{0, 0, 0}, {1, 1, 1}, {1, 1, 1}}, al, cl);
+    p.compile(migraphx::cpu::target{});
+    auto result = p.eval({}).back();
+
+    std::vector<float> results_vector(16);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify_range(results_vector, s));
+}
+
 TEST_CASE(conv2d_padding_test)
 {
    migraphx::program p;

--- a/test/gpu/ops_test.cpp
+++ b/test/gpu/ops_test.cpp
@@ -724,6 +724,21 @@ struct test_conv2 : verify_program<test_conv2>
    }
 };

+struct test_conv3d : verify_program<test_conv3d>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto input =
+            p.add_parameter("x", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3, 3}});
+        auto weights =
+            p.add_parameter("w", migraphx::shape{migraphx::shape::float_type, {4, 3, 3, 3, 3}});
+        p.add_instruction(
+            migraphx::op::convolution{{0, 0, 0}, {1, 1, 1}, {1, 1, 1}}, input, weights);
+        return p;
+    }
+};
+
 struct test_group_conv : verify_program<test_group_conv>
 {
    migraphx::program create_program() const