Added concat operator + cpu implementation ... still need gpu implementation

0c68e1cd · Scott Thornton · 20746f2c · 0c68e1cd · 0c68e1cd · 0c68e1cd
Commit 0c68e1cd authored Oct 15, 2018 by Scott Thornton
Showing with 126 additions and 0 deletions

src/include/migraph/operators.hpp src/include/migraph/operators.hpp +35 -0

src/targets/cpu/cpu_lowering.cpp src/targets/cpu/cpu_lowering.cpp +74 -0

test/cpu_ops_test.cpp test/cpu_ops_test.cpp +17 -0

No files found.
--- a/src/include/migraph/operators.hpp
+++ b/src/include/migraph/operators.hpp
@@ -283,6 +283,41 @@ struct contiguous
    }
 };

+struct concat
+{
+    std::size_t axis = 0;
+    std::string name() const { return "concat"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        if (inputs.empty())
+        {
+            MIGRAPH_THROW("Number of input tensors should exceed 0");
+        }
+
+        const auto& first_shape_lens = inputs.front().lens();
+        const auto& type = inputs.front().type();
+        for (std::size_t l = 0; l < first_shape_lens.size(); l++) {
+            if (l != axis) {
+                if (!std::all_of(inputs.begin(), inputs.end(), [&] (auto s) {
+                    return s.lens()[l] == first_shape_lens[l];}))
+                { 
+                    MIGRAPH_THROW("Non-axis dimensions should match"); 
+                }
+            }
+        }
+        std::size_t new_dim_axis = 0;
+        for (const auto& input : inputs)
+        {
+            const auto& lens = input.lens();
+            new_dim_axis += lens[axis];
+        }
+        std::vector<std::size_t> new_lens;
+        std::copy(first_shape_lens.begin(), first_shape_lens.end(), std::back_inserter(new_lens));
+        new_lens[axis] = new_dim_axis;
+        return {type, new_lens};
+    }
+};
+
 struct slice
 {
    std::vector<int64_t> axes;

--- a/src/targets/cpu/cpu_lowering.cpp
+++ b/src/targets/cpu/cpu_lowering.cpp
@@ -282,6 +282,79 @@ struct cpu_contiguous
    }
 };

+
+struct cpu_concat
+{
+    struct tensor_descriptor
+    {
+        tensor_descriptor() = default;
+        tensor_descriptor(const shape& s)
+         : lens(s.lens()), strides(s.strides()) {}
+        std::vector<std::size_t> multi(size_t idx) const
+        {
+            std::size_t sz = strides.size();
+            std::vector<std::size_t> result(sz);
+            size_t tidx = idx;
+            for(size_t is = 0; is < sz; is++)
+            {
+                result[is] = tidx / strides[is];
+                tidx       = tidx % strides[is];
+            }
+            return result;
+        }
+        size_t linear(std::vector<std::size_t> s) const
+        {
+            //return std::inner_product(s.begin(), s.end(), strides.begin(), 0);
+            size_t idx = 0;
+            for(size_t i = 0; i < s.size(); i++)
+                idx += s[i] * strides[i];
+            return idx;
+
+        }
+        std::vector<std::size_t> lens;
+        std::vector<std::size_t> strides;
+    };
+
+    op::concat op;
+    std::string name() const { return "cpu::concat"; }
+    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
+    std::vector<std::size_t> compute_offsets(const shape& output_shape, const std::vector<argument> args) const
+    {
+        std::vector<std::size_t> offsets;
+        std::vector<std::size_t> offset(args[0].get_shape().lens().size(),0);
+        offset[op.axis] = 0;
+        for (const auto& arg : args)
+        {
+            offsets.push_back(output_shape.index(offset));
+            offset[op.axis] += arg.get_shape().lens()[op.axis];
+        }
+        return offsets;
+    }
+    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    {
+
+        argument result{output_shape};
+        std::vector<std::size_t> coffsets = compute_offsets(output_shape, args);
+        for (std::size_t l = 0; l < args.size(); l++)
+        {
+            auto argl = args[l];
+            std::cout << argl << std::endl;
+            std::size_t nelements = argl.get_shape().elements();
+            visit_all(result, argl)([&](auto output, auto input) {
+                auto* outptr = output.data() + coffsets[l];
+                const auto* inptr = input.data();
+                tensor_descriptor desc_input(input.get_shape());
+                tensor_descriptor desc_output(output.get_shape());
+                for (std::size_t i = 0; i < nelements; i++)
+                {
+                    outptr[desc_output.linear(desc_input.multi(i))] = inptr[i];
+                }
+            });
+        }
+        return result;
+    }
+};
+
 struct cpu_gemm
 {
    op::gemm op;
@@ -557,6 +630,7 @@ struct cpu_apply
        apply_map["batch_norm_inference"] =
            extend_op<cpu_batch_norm_inference, op::batch_norm_inference>();
        apply_map["contiguous"] = extend_op<cpu_contiguous, op::contiguous>();
+        apply_map["concat"] = extend_op<cpu_concat, op::concat>();

        apply_map["identity"] = simple_op<cpu_unary<identity_op>>();
        apply_map["tanh"]     = simple_op<cpu_unary<tanh_op>>();

--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -47,6 +47,22 @@ void slice_test()
    }
 }

+void concat_test()
+{
+    migraph::program p;
+    std::size_t axis = 1;
+    std::vector<int> data0 = {0, 1, 5, 6};
+    std::vector<int> data1 = {2, 3, 4, 5, 6, 7};
+    migraph::shape s0{migraph::shape::int32_type, {2, 2}};
+    migraph::shape s1{migraph::shape::int32_type, {2, 3}};
+    auto l0 = p.add_literal(migraph::literal{s0, data0});
+    auto l1 = p.add_literal(migraph::literal{s1, data1});
+    p.add_instruction(migraph::op::concat{axis}, l0, l1);
+    p.compile(migraph::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::cout << result << std::endl;
+}
+
 void squeeze_test()
 {
    {
@@ -905,6 +921,7 @@ void contiguous_test()

 int main()
 {
+    concat_test();
    slice_test();
    squeeze_test();
    unsqueeze_test();