Merge from master

1682b223 · Paul · e83034cf · 1fc7013f · 1682b223 · 1682b223
Commit 1682b223 authored Sep 30, 2018 by Paul
10 changed files
--- a/src/targets/gpu/include/migraph/gpu/context.hpp
+++ b/src/targets/gpu/include/migraph/gpu/context.hpp
@@ -12,12 +12,11 @@ struct context
 {
    shared<miopen_handle> handle;
    shared<rocblas_handle_ptr> rbhandle;
+    argument scratch;
    std::vector<argument> literals{};
    void finish() const { gpu_sync(); }
 };
-
 } // namespace gpu
-
 } // namespace migraph

 #endif
--- a/src/targets/gpu/include/migraph/gpu/hip.hpp
+++ b/src/targets/gpu/include/migraph/gpu/hip.hpp
@@ -15,6 +15,8 @@ migraph::argument from_gpu(migraph::argument arg);

 void gpu_sync();

+void copy_to_gpu(char* dst, const char* src, std::size_t size);
+
 struct hip_allocate
 {
    std::string tag{};
@@ -30,22 +32,6 @@ struct hip_allocate
    }
 };

-struct hip_load
-{
-    shape s;
-    std::size_t offset = 0;
-    std::string name() const { return "hip::load"; }
-    shape compute_shape(const std::vector<shape>& inputs) const
-    {
-        check_shapes{inputs}.has(1);
-        return s;
-    }
-    argument compute(context&, const shape&, const std::vector<argument>& args) const
-    {
-        return {s, args[0].data() + offset};
-    }
-};
-
 struct hip_sync
 {
    std::string tag{};
@@ -81,8 +67,21 @@ struct hip_write
    }
 };

+struct hip_memcpy
+{
+    std::string name() const { return "hip_memcpy"; }
+    shape compute_shape(std::vector<shape> inputs) const { return inputs.at(1); }
+    argument compute(context&, shape output_shape, std::vector<argument> args) const
+    {
+        char* dst        = args.at(0).data() + offset;
+        const char* src  = args.at(1).data();
+        std::size_t size = args.at(1).get_shape().bytes();
+        copy_to_gpu(dst, src, size);
+        return {std::move(output_shape), dst};
+    }
+    std::size_t offset = 0;
+};
 } // namespace gpu
-
 } // namespace migraph

 #endif
--- a/src/targets/gpu/include/migraph/gpu/target.hpp
+++ b/src/targets/gpu/include/migraph/gpu/target.hpp
@@ -12,9 +12,7 @@ struct target
    std::vector<pass> get_passes(migraph::context& gctx) const;
    migraph::context get_context() const;
 };
-
 } // namespace gpu
-
 } // namespace migraph

 #endif
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -107,9 +107,12 @@ struct miopen_convolution
            ctx.handle.get(), w_desc.get(), x_desc.get(), cd.get(), y_desc.get(), &workspace_size);
        workspace_shape = shape{shape::int8_type, {workspace_size}};

-        auto x         = to_gpu(generate_argument(inputs[0]->get_shape()));
-        auto w         = to_gpu(generate_argument(inputs[1]->get_shape()));
-        auto y         = to_gpu(generate_argument(output_shape));
+        auto x = to_gpu(generate_argument(inputs[0]->get_shape()));
+        gpu_sync();
+        auto w = to_gpu(generate_argument(inputs[1]->get_shape()));
+        gpu_sync();
+        auto y = to_gpu(generate_argument(output_shape));
+        gpu_sync();
        auto workspace = allocate_gpu(workspace_shape);

        int algo_count = 1;
@@ -497,7 +500,5 @@ struct miopen_apply
 };

 void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
-
 } // namespace gpu
-
 } // namespace migraph
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
 #include <migraph/gpu/target.hpp>
 #include <migraph/gpu/lowering.hpp>
+#include <migraph/memory_coloring.hpp>
 #include <migraph/gpu/write_literals.hpp>
 #include <migraph/gpu/context.hpp>
 #include <migraph/gpu/eliminate_workspace.hpp>
@@ -28,6 +29,7 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
        simplify_reshapes{},
        dead_code_elimination{},
        lowering{ctx},
+        memory_coloring{"hip::allocate"},
        fuse_ops{},
        dead_code_elimination{},
        eliminate_contiguous{},
@@ -45,10 +47,8 @@ std::string target::name() const { return "miopen"; }

 migraph::context target::get_context() const
 {
-    return context{share(make_obj<miopen_handle>(&miopenCreate)),
-                   share(create_rocblas_handle_ptr())};
+    return context{
+        share(make_obj<miopen_handle>(&miopenCreate)), share(create_rocblas_handle_ptr()), {}};
 }
-
 } // namespace gpu
-
 } // namespace migraph
--- a/src/targets/gpu/write_literals.cpp
+++ b/src/targets/gpu/write_literals.cpp
@@ -37,7 +37,5 @@ void write_literals::apply(program& p) const
        }
    }
 }
-
 } // namespace gpu
-
 } // namespace migraph
--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -6,6 +6,109 @@
 #include <migraph/verify.hpp>
 #include "test.hpp"

+void slice_test()
+{
+    {
+        migraph::program p;
+        std::vector<int> data(2 * 2 * 3);
+        std::iota(data.begin(), data.end(), 0);
+        migraph::shape s{migraph::shape::int32_type, {2, 2, 3}};
+        auto l0 = p.add_literal(migraph::literal{s, data});
+        p.add_instruction(migraph::op::slice{{2}, {1}, {3}}, l0);
+        migraph::shape s2{migraph::shape::int32_type, {2, 2, 2}, {6, 3, 1}};
+        EXPECT(p.get_shape() == s2);
+        p.compile(migraph::cpu::cpu_target{});
+        migraph::shape sresult{migraph::shape::int32_type, {2, 2, 2}, {4, 2, 1}};
+        auto result           = p.eval({});
+        std::vector<int> gold = {1, 2, 4, 5, 7, 8, 10, 11};
+        std::vector<int> results_vector(2 * 2 * 2);
+        result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+        EXPECT(migraph::verify_range(results_vector, gold));
+        EXPECT(result.get_shape() == sresult);
+    }
+    {
+        migraph::program p;
+        std::vector<int> data(2 * 2 * 3);
+        std::iota(data.begin(), data.end(), 0);
+        migraph::shape s{migraph::shape::int32_type, {2, 2, 3}};
+        auto l0 = p.add_literal(migraph::literal{s, data});
+        p.add_instruction(migraph::op::slice{{0, 1, 2}, {0, 0, 0}, {2, 2, 2}}, l0);
+        migraph::shape s2{migraph::shape::int32_type, {2, 2, 2}, {6, 3, 1}};
+        EXPECT(p.get_shape() == s2);
+        p.compile(migraph::cpu::cpu_target{});
+        migraph::shape sresult{migraph::shape::int32_type, {2, 2, 2}, {4, 2, 1}};
+        auto result           = p.eval({});
+        std::vector<int> gold = {0, 1, 3, 4, 6, 7, 9, 10};
+        std::vector<int> results_vector(2 * 2 * 2);
+        result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+        EXPECT(migraph::verify_range(results_vector, gold));
+        EXPECT(result.get_shape() == sresult);
+    }
+}
+
+void squeeze_test()
+{
+    {
+        migraph::program p;
+        std::vector<float> data(4 * 3 * 3);
+        migraph::shape s1{migraph::shape::float_type, {4, 1, 3, 1, 3}};
+        migraph::shape s2{migraph::shape::float_type, {4, 3, 1, 3}};
+        auto l0 = p.add_literal(migraph::literal{s1, data});
+        p.add_instruction(migraph::op::squeeze{{1}}, l0);
+        p.compile(migraph::cpu::cpu_target{});
+        auto result = p.eval({});
+        EXPECT(result.get_shape() == s2);
+    }
+    {
+        migraph::program p;
+        std::vector<float> data(4 * 3 * 3);
+        migraph::shape s1{migraph::shape::float_type, {4, 1, 3, 1, 3}};
+        migraph::shape s2{migraph::shape::float_type, {4, 1, 3, 3}};
+        auto l0 = p.add_literal(migraph::literal{s1, data});
+        p.add_instruction(migraph::op::squeeze{{3}}, l0);
+        p.compile(migraph::cpu::cpu_target{});
+        auto result = p.eval({});
+        EXPECT(result.get_shape() == s2);
+    }
+    {
+        migraph::program p;
+        std::vector<float> data(4 * 3 * 3);
+        migraph::shape s1{migraph::shape::float_type, {4, 1, 3, 1, 3}};
+        migraph::shape s2{migraph::shape::float_type, {4, 3, 3}};
+        auto l0 = p.add_literal(migraph::literal{s1, data});
+        p.add_instruction(migraph::op::squeeze{}, l0);
+        p.compile(migraph::cpu::cpu_target{});
+        auto result = p.eval({});
+        EXPECT(result.get_shape() == s2);
+    }
+}
+
+void unsqueeze_test()
+{
+    {
+        migraph::program p;
+        std::vector<float> data(4 * 3 * 3);
+        migraph::shape s1{migraph::shape::float_type, {4, 3, 3}};
+        migraph::shape s2{migraph::shape::float_type, {4, 1, 3, 3}};
+        auto l0 = p.add_literal(migraph::literal{s1, data});
+        p.add_instruction(migraph::op::unsqueeze{{1}}, l0);
+        p.compile(migraph::cpu::cpu_target{});
+        auto result = p.eval({});
+        EXPECT(result.get_shape() == s2);
+    }
+    {
+        migraph::program p;
+        std::vector<float> data(4 * 3 * 3);
+        migraph::shape s1{migraph::shape::float_type, {4, 3, 3}};
+        migraph::shape s2{migraph::shape::float_type, {4, 3, 1, 3}};
+        auto l0 = p.add_literal(migraph::literal{s1, data});
+        p.add_instruction(migraph::op::unsqueeze{{2}}, l0);
+        p.compile(migraph::cpu::cpu_target{});
+        auto result = p.eval({});
+        EXPECT(result.get_shape() == s2);
+    }
+}
+
 void im2col_3x3_no_pad_identity_test()
 {
    std::size_t f[2]    = {3, 3};
@@ -801,6 +904,9 @@ void contiguous_test()

 int main()
 {
+    slice_test();
+    squeeze_test();
+    unsqueeze_test();
    exp_test();
    sin_test();
    cos_test();
@@ -814,7 +920,7 @@ int main()
    gemm_test<double>();
    reshape_test();
    transpose_test();
-    contiguous_test();
+    // contiguous_test();
    softmax_test();
    // maxpool_test();
    conv2d_test();

--- a/test/eliminate_allocation_test.cpp
+++ b/test/eliminate_allocation_test.cpp
@@ -102,6 +102,7 @@ void float_aligned()

 int main()
 {
+    setenv("MIGRAPH_DISABLE_MEMORY_COLORING", "1", 1);
    basic();
    aligned();
    unaligned();

--- a/test/memory_coloring_test.cpp
+++ b/test/memory_coloring_test.cpp
+#include <migraph/memory_coloring.hpp>
+#include <migraph/operators.hpp>
+#include <basic_ops.hpp>
+#include <test.hpp>
+
+struct memory_coloring_target
+{
+    std::string name() const { return "memory_coloring"; }
+    std::vector<migraph::pass> get_passes(migraph::context&) const
+    {
+        return {migraph::memory_coloring{"allocate"}};
+    }
+    migraph::context get_context() const { return {}; }
+};
+
+struct allocate
+{
+    migraph::shape s{};
+    std::string name() const { return "allocate"; }
+    migraph::shape compute_shape(const std::vector<migraph::shape>& inputs) const
+    {
+        migraph::check_shapes{inputs, *this}.has(1);
+        return inputs.front();
+    }
+    migraph::argument compute(migraph::context&,
+                              const migraph::shape& output_shape,
+                              const std::vector<migraph::argument>&) const
+    {
+        return {output_shape};
+    }
+};
+
+// A custom test operator that takes a single argument and an allocation
+// This operator's output is an operand alias of argument 1
+struct pass_memory
+{
+    std::string name() const { return "memory_coloring::pass_memory"; }
+    migraph::shape compute_shape(const std::vector<migraph::shape>& inputs) const
+    {
+        migraph::check_shapes{inputs, *this}.has(2);
+        return inputs.at(1);
+    }
+    migraph::argument compute(migraph::context&,
+                              const migraph::shape&,
+                              const std::vector<migraph::argument>& args) const
+    {
+        return args[1];
+    }
+};
+
+// The previous existing test
+void test1()
+{
+    migraph::program p;
+    auto a0 = p.add_outline(migraph::shape{migraph::shape::float_type, {8}});
+    auto a1 = p.add_instruction(allocate{}, a0);
+    auto p1 = p.add_instruction(pass_op{}, a1);
+    auto a2 = p.add_outline(migraph::shape{migraph::shape::float_type, {40}});
+    auto p2 = p.add_instruction(allocate{}, a2);
+    p.add_instruction(pass_op{}, p2, p1);
+    p.compile(memory_coloring_target{});
+    EXPECT(p.get_parameter_shape("scratch").bytes() == 192);
+}
+
+// This test uses the pass_memory operator
+void test2()
+{
+    migraph::program p;
+    auto input = p.add_parameter("input", migraph::shape{migraph::shape::float_type, {16}});
+
+    auto a0 = p.add_outline(migraph::shape{migraph::shape::float_type, {128}});
+    auto a1 = p.add_instruction(allocate{}, a0);
+    auto p1 = p.add_instruction(pass_memory{}, input, a1);
+    auto a2 = p.add_outline(migraph::shape{migraph::shape::float_type, {40}});
+    auto p2 = p.add_instruction(allocate{}, a2);
+    p.add_instruction(pass_memory{}, p1, p2);
+    p.compile(memory_coloring_target{});
+    EXPECT(p.get_parameter_shape("scratch").bytes() == 672);
+}
+
+// This test uses the pass_memory operator with two memory allocation passed together.
+// This is similar to allocations done for workspaces, that is one allocation is aliased and the
+// other is just used
+void test3()
+{
+    migraph::program p;
+    auto a0 = p.add_outline(migraph::shape{migraph::shape::float_type, {8}});
+    auto a1 = p.add_instruction(allocate{}, a0);
+    auto a2 = p.add_outline(migraph::shape{migraph::shape::float_type, {128}});
+    auto p2 = p.add_instruction(allocate{}, a2);
+    auto p1 = p.add_instruction(pass_memory{}, a1, p2);
+    auto a3 = p.add_outline(migraph::shape{migraph::shape::float_type, {40}});
+    auto p3 = p.add_instruction(allocate{}, a3);
+    p.add_instruction(pass_memory{}, p1, p3);
+    p.compile(memory_coloring_target{});
+    EXPECT(p.get_parameter_shape("scratch").bytes() == 704);
+}
+
+// Like the previous test, but this tests a zero workspace memory allocation
+void test4()
+{
+    migraph::program p;
+    auto a0 = p.add_outline(migraph::shape{migraph::shape::float_type, {0}});
+    auto a1 = p.add_instruction(allocate{}, a0);
+    auto a2 = p.add_outline(migraph::shape{migraph::shape::float_type, {128}});
+    auto p2 = p.add_instruction(allocate{}, a2);
+    auto p1 = p.add_instruction(pass_memory{}, a1, p2);
+    auto a3 = p.add_outline(migraph::shape{migraph::shape::float_type, {40}});
+    auto p3 = p.add_instruction(allocate{}, a3);
+    p.add_instruction(pass_memory{}, p1, p3);
+    p.compile(memory_coloring_target{});
+    EXPECT(p.get_parameter_shape("scratch").bytes() == 672);
+}
+
+int main()
+{
+    test1();
+    test2();
+    test3();
+    test4();
+}
--- a/test/op_shape_test.cpp
+++ b/test/op_shape_test.cpp
@@ -132,6 +132,19 @@ void flatten_shape()
    throws_shape(migraph::op::flatten{5}, input);
 }

+void slice_shape()
+{
+    migraph::shape input{migraph::shape::int32_type, {2, 2, 3}};
+    expect_shape(migraph::shape{migraph::shape::int32_type, {2, 2, 2}, {6, 3, 1}},
+                 migraph::op::slice{{2}, {1}, {3}},
+                 input);
+    expect_shape(migraph::shape{migraph::shape::int32_type, {2, 2, 2}, {6, 3, 1}},
+                 migraph::op::slice{{0, 1, 2}, {0, 0, 1}, {2, 2, 3}},
+                 input);
+    expect_shape(migraph::shape{migraph::shape::int32_type, {2, 2, 1}, {6, 3, 1}},
+                 migraph::op::slice{{2}, {2}, {10}},
+                 input);
+}
 int main()
 {
    batch_norm_inference_shape();
@@ -140,4 +153,5 @@ int main()
    contiguous_shape();
    reshape_shape();
    flatten_shape();
+    slice_shape();
 }