Merge pull request #50 from ROCmSoftwarePlatform/im2col_cpu

Im2col cpu

Merge pull request #50 from ROCmSoftwarePlatform/im2col_cpu
Im2col cpu
f550da30 · Paul Fultz II · GitHub · 9fee0fe4 · d9170e2d · f550da30
Unverified Commit f550da30 authored Sep 12, 2018 by Paul Fultz II Committed by GitHub Sep 12, 2018
Showing with 263 additions and 0 deletions

src/include/migraph/operators.hpp src/include/migraph/operators.hpp +45 -0

src/targets/cpu/cpu_lowering.cpp src/targets/cpu/cpu_lowering.cpp +58 -0

test/cpu_ops_test.cpp test/cpu_ops_test.cpp +160 -0

No files found.
--- a/src/include/migraph/operators.hpp
+++ b/src/include/migraph/operators.hpp
@@ -131,6 +131,51 @@ struct convolution
    }
 };
+struct im2col
+{
+    std::array<std::size_t, 2> padding  = {{0, 0}};
+    std::array<std::size_t, 2> stride   = {{1, 1}};
+    std::array<std::size_t, 2> dilation = {{1, 1}};
+    enum padding_mode_t
+    {
+        default_, // NOLINT
+        same,
+        valid
+    };
+    std::string name() const { return "im2col"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        auto input          = inputs[0];
+        auto weights        = inputs[1];
+        auto batch_size     = input.lens()[0];
+        auto input_channels = weights.lens()[1];
+        auto kernel_height  = weights.lens()[2];
+        auto kernel_width   = weights.lens()[3];
+        check_shapes{inputs, *this}.has(2);
+        if(batch_size != 1)
+            MIGRAPH_THROW("im2col only support batch_size 1");
+        auto output_height = std::size_t(std::max<std::ptrdiff_t>(
+            1,
+            (input.lens()[2] - (1 + dilation[0] * (kernel_height - 1)) + 2 * padding[0]) /
+                    stride[0] +
+                1));
+        auto output_width  = std::size_t(std::max<std::ptrdiff_t>(
+            1,
+            (input.lens()[3] - (1 + dilation[1] * (kernel_width - 1)) + 2 * padding[1]) /
+                    stride[1] +
+                1));
+        auto channels_col  = kernel_height * kernel_width * input_channels;
+        return {input.type(), {output_height * output_width, channels_col}};
+    }
+    argument compute(context&, const shape&, const std::vector<argument>&) const
+    {
+        MIGRAPH_THROW("not computable");
+    }
+};
 struct pooling
 {
    std::string mode                   = "average";

--- a/src/targets/cpu/cpu_lowering.cpp
+++ b/src/targets/cpu/cpu_lowering.cpp
@@ -134,6 +134,63 @@ struct cpu_convolution
    }
 };
+struct cpu_im2col
+{
+    im2col op;
+    static std::string name() { return "cpu::im2col"; }
+    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
+    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        auto input_shape   = args[0].get_shape();
+        auto weights_shape = args[1].get_shape();
+        visit_all(result, args[0])([&](auto col, auto input) {
+            const std::size_t& height   = input_shape.lens()[2];
+            const std::size_t& width    = input_shape.lens()[3];
+            const std::size_t& channels = weights_shape.lens()[1];
+            const std::size_t& kernel_h = weights_shape.lens()[2];
+            const std::size_t& kernel_w = weights_shape.lens()[3];
+            const std::size_t& pad_h    = op.padding[0];
+            const std::size_t& pad_w    = op.padding[1];
+            const std::size_t& stride_h = op.stride[0];
+            const std::size_t& stride_w = op.stride[1];
+            int kdiv2_h, kdiv2_w;
+            kdiv2_h = kernel_h / 2;
+            kdiv2_w = kernel_w / 2;
+            // calculate output sizes
+            const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
+            const std::size_t col_width  = (width - kernel_w + 2 * pad_w) / stride_w + 1;
+            // account for padding for the starting position of the input pixels
+            std::size_t iinput = kdiv2_h - pad_h;
+            // loop over output pixels (ioutput, joutput)
+            for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
+            {
+                std::size_t jinput = kdiv2_w - pad_w;
+                for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
+                {
+                    // compute linear index for output
+                    std::size_t ldx = ioutput * col_width + joutput;
+                    std::size_t p   = 0;
+                    dfor(channels,
+                         kernel_h,
+                         kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
+                        int idx     = iinput + koffset - kdiv2_h;
+                        int jdx     = jinput + loffset - kdiv2_w;
+                        col(ldx, p) = ((idx >= 0) && (idx < height) && (jdx >= 0) && (jdx < width))
+                                          ? input(0, c, idx, jdx)
+                                          : 0;
+                        p++;
+                    });
+                }
+            }
+        });
+        return result;
+    }
+};
 struct max_pool
 {
    static std::string name() { return "max"; }
@@ -494,6 +551,7 @@ struct cpu_apply
    void init()
    {
+        apply_map["im2col"]      = extend_op<cpu_im2col, im2col>();
        apply_map["convolution"] = extend_op<cpu_convolution, convolution>();
        apply_map["gemm"]        = extend_op<cpu_gemm, gemm>();
        apply_map["batch_norm_inference"] =

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -6,6 +6,132 @@
 #include <migraph/verify.hpp>
 #include "test.hpp"
+void im2col_3x3_no_pad_identity_test()
+{
+    std::size_t f[2]    = {3, 3};
+    std::size_t size[2] = {3, 3};
+    std::array<std::size_t, 2> padding{{0, 0}};
+    std::array<std::size_t, 2> stride{{1, 1}};
+    std::array<std::size_t, 2> dilation{{1, 1}};
+    std::size_t channels = 1;
+    std::vector<int32_t> weights(channels * f[0] * f[1]);
+    std::vector<int32_t> input(channels * size[0] * size[1]);
+    std::iota(input.begin(), input.end(), 0);
+    migraph::program p;
+    migraph::shape s_image{migraph::shape::int32_type, {1, channels, size[0], size[1]}};
+    migraph::shape s_weights{migraph::shape::int32_type, {1, channels, f[0], f[1]}};
+    auto l_image   = p.add_literal(migraph::literal{s_image, input});
+    auto l_weights = p.add_literal(migraph::literal{s_weights, weights});
+    p.add_instruction(migraph::im2col{padding, stride, dilation}, l_image, l_weights);
+    p.compile(migraph::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::size_t col_height = (size[0] - f[0] + 2 * padding[0]) / stride[0] + 1;
+    std::size_t col_width  = (size[1] - f[1] + 2 * padding[1]) / stride[1] + 1;
+    std::vector<float> results_vector(channels * f[0] * f[1] * col_height * col_width);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraph::verify_range(results_vector, input));
+}
+void im2col_3x3_no_pad_test()
+{
+    std::size_t f[2]    = {3, 3};
+    std::size_t size[2] = {4, 4};
+    std::array<std::size_t, 2> padding{{0, 0}};
+    std::array<std::size_t, 2> stride{{1, 1}};
+    std::array<std::size_t, 2> dilation{{1, 1}};
+    std::size_t channels = 1;
+    std::vector<int32_t> weights(channels * f[0] * f[1]);
+    std::vector<int32_t> input(channels * size[0] * size[1]);
+    std::iota(input.begin(), input.end(), 0);
+    migraph::program p;
+    migraph::shape s_image{migraph::shape::int32_type, {1, channels, size[0], size[1]}};
+    migraph::shape s_weights{migraph::shape::int32_type, {1, channels, f[0], f[1]}};
+    auto l_image   = p.add_literal(migraph::literal{s_image, input});
+    auto l_weights = p.add_literal(migraph::literal{s_weights, weights});
+    p.add_instruction(migraph::im2col{padding, stride, dilation}, l_image, l_weights);
+    p.compile(migraph::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::vector<int> correct = {0, 1, 2, 4, 5, 6,  8,  9,  10, 1, 2, 3, 5, 6,  7,  9,  10, 11,
+                                4, 5, 6, 8, 9, 10, 12, 13, 14, 5, 6, 7, 9, 10, 11, 13, 14, 15};
+    std::size_t col_height = (size[0] - f[0] + 2 * padding[0]) / stride[0] + 1;
+    std::size_t col_width  = (size[1] - f[1] + 2 * padding[1]) / stride[1] + 1;
+    std::vector<float> results_vector(channels * f[0] * f[1] * col_height * col_width);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraph::verify_range(results_vector, correct));
+}
+void im2col_3x3_stride_2_no_pad_test()
+{
+    std::size_t f[2]    = {3, 3};
+    std::size_t size[2] = {6, 6};
+    std::array<std::size_t, 2> padding{{0, 0}};
+    std::array<std::size_t, 2> stride{{2, 2}};
+    std::array<std::size_t, 2> dilation{{1, 1}};
+    std::size_t channels = 1;
+    std::vector<int32_t> weights(channels * f[0] * f[1]);
+    std::vector<int32_t> input(channels * size[0] * size[1]);
+    std::iota(input.begin(), input.end(), 0);
+    migraph::program p;
+    migraph::shape s_image{migraph::shape::int32_type, {1, channels, size[0], size[1]}};
+    migraph::shape s_weights{migraph::shape::int32_type, {1, channels, f[0], f[1]}};
+    auto l_image   = p.add_literal(migraph::literal{s_image, input});
+    auto l_weights = p.add_literal(migraph::literal{s_weights, weights});
+    p.add_instruction(migraph::im2col{padding, stride, dilation}, l_image, l_weights);
+    p.compile(migraph::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::vector<int> correct = {0,  1,  2,  6,  7,  8,  12, 13, 14, 2,  3,  4,
+                                8,  9,  10, 14, 15, 16, 12, 13, 14, 18, 19, 20,
+                                24, 25, 26, 14, 15, 16, 20, 21, 22, 26, 27, 28};
+    std::size_t col_height = (size[0] - f[0] + 2 * padding[0]) / stride[0] + 1;
+    std::size_t col_width  = (size[1] - f[1] + 2 * padding[1]) / stride[1] + 1;
+    std::vector<float> results_vector(channels * f[0] * f[1] * col_height * col_width);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraph::verify_range(results_vector, correct));
+}
+void im2col_3x3_with_padding_test()
+{
+    std::size_t f[2]    = {3, 3};
+    std::size_t size[2] = {2, 2};
+    std::array<std::size_t, 2> padding{{1, 1}};
+    std::array<std::size_t, 2> stride{{1, 1}};
+    std::array<std::size_t, 2> dilation{{1, 1}};
+    std::size_t channels = 1;
+    std::vector<int32_t> weights(channels * f[0] * f[1]);
+    std::vector<int32_t> input(channels * size[0] * size[1]);
+    std::iota(input.begin(), input.end(), 0);
+    migraph::program p;
+    migraph::shape s_image{migraph::shape::int32_type, {1, channels, size[0], size[1]}};
+    migraph::shape s_weights{migraph::shape::int32_type, {1, channels, f[0], f[1]}};
+    auto l_image   = p.add_literal(migraph::literal{s_image, input});
+    auto l_weights = p.add_literal(migraph::literal{s_weights, weights});
+    p.add_instruction(migraph::im2col{padding, stride, dilation}, l_image, l_weights);
+    p.compile(migraph::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::vector<int> correct = {0, 0, 0, 0, 0, 1, 0, 2, 3, 0, 0, 0, 0, 1, 0, 2, 3, 0,
+                                0, 0, 1, 0, 2, 3, 0, 0, 0, 0, 1, 0, 2, 3, 0, 0, 0, 0};
+    std::size_t col_height = (size[0] - f[0] + 2 * padding[0]) / stride[0] + 1;
+    std::size_t col_width  = (size[1] - f[1] + 2 * padding[1]) / stride[1] + 1;
+    std::vector<float> results_vector(channels * f[0] * f[1] * col_height * col_width);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraph::verify_range(results_vector, correct));
+}
 void batch_norm_inference_test()
 {
    migraph::program p;
@@ -46,6 +172,35 @@ void batch_norm_inference_test()
    EXPECT(migraph::verify_range(result_vector, gold));
 }
+void im2col_3x3_with_channels_identity_test()
+{
+    std::size_t f[2]    = {3, 3};
+    std::size_t size[2] = {3, 3};
+    std::array<std::size_t, 2> padding{{0, 0}};
+    std::array<std::size_t, 2> stride{{1, 1}};
+    std::array<std::size_t, 2> dilation{{1, 1}};
+    std::size_t channels = 2;
+    std::vector<int32_t> weights(channels * f[0] * f[1]);
+    std::vector<int32_t> input(channels * size[0] * size[1]);
+    std::iota(input.begin(), input.end(), 0);
+    migraph::program p;
+    migraph::shape s_image{migraph::shape::int32_type, {1, channels, size[0], size[1]}};
+    migraph::shape s_weights{migraph::shape::int32_type, {1, channels, f[0], f[1]}};
+    auto l_image   = p.add_literal(migraph::literal{s_image, input});
+    auto l_weights = p.add_literal(migraph::literal{s_weights, weights});
+    p.add_instruction(migraph::im2col{padding, stride, dilation}, l_image, l_weights);
+    p.compile(migraph::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::size_t col_height = (size[0] - f[0] + 2 * padding[0]) / stride[0] + 1;
+    std::size_t col_width  = (size[1] - f[1] + 2 * padding[1]) / stride[1] + 1;
+    std::vector<float> results_vector(channels * f[0] * f[1] * col_height * col_width);
+    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(migraph::verify_range(results_vector, input));
+}
 void exp_test()
 {
    migraph::program p;
@@ -666,4 +821,9 @@ int main()
    conv2d_padding_test();
    conv2d_padding_stride_test();
    batch_norm_inference_test();
+    im2col_3x3_no_pad_identity_test();
+    im2col_3x3_no_pad_test();
+    im2col_3x3_stride_2_no_pad_test();
+    im2col_3x3_with_channels_identity_test();
+    im2col_3x3_with_padding_test();
 }