Add initial conitiguous pass

e63c09c5 · Paul · 39151d27 · e63c09c5 · e63c09c5 · e63c09c5
Commit e63c09c5 authored Aug 09, 2018 by Paul
9 changed files
--- a/src/include/migraph/check_shapes.hpp
+++ b/src/include/migraph/check_shapes.hpp
@@ -70,7 +70,35 @@ struct check_shapes
    const check_shapes& same_ndims() const
    {
        if(!this->same([](const shape& s) { return s.lens().size(); }))
-            MIGRAPH_THROW(prefix() + "Dimensions do not match");
+            MIGRAPH_THROW(prefix() + "Number of dimensions do not match");
+        return *this;
+    }
+
+    const check_shapes& standard() const
+    {
+        if(!this->all_of([](const shape& s) { return s.standard(); }))
+            MIGRAPH_THROW(prefix() + "Shapes are not in standard layout");
+        return *this;
+    }
+
+    const check_shapes& packed() const
+    {
+        if(!this->all_of([](const shape& s) { return s.packed(); }))
+            MIGRAPH_THROW(prefix() + "Shapes are not packed");
+        return *this;
+    }
+
+    const check_shapes& not_transposed() const
+    {
+        if(!this->all_of([](const shape& s) { return not s.transposed(); }))
+            MIGRAPH_THROW(prefix() + "Shapes are transposed");
+        return *this;
+    }
+
+    const check_shapes& not_broadcasted() const
+    {
+        // if(!this->all_of([](const shape& s) { return not s.broadcasted(); }))
+            // MIGRAPH_THROW(prefix() + "Shapes are broadcasted");
        return *this;
    }


--- a/src/include/migraph/instruction.hpp
+++ b/src/include/migraph/instruction.hpp
@@ -38,6 +38,8 @@ struct instruction
            result = r;
            for(auto&& ins : output)
            {
+                assert(ins->op.name().front() != '@');
+                std::cout << ins->op.name() << std::endl;
                ins->replace(compute_shape(ins->op, ins->arguments));
            }
        }
@@ -122,6 +124,7 @@ inline void backreference(instruction_ref ref)
 // TODO: Use const ref for vector
 inline shape compute_shape(operation op, std::vector<instruction_ref> args)
 {
+    std::cout << "compute_shape: " << op.name() << std::endl;
    std::vector<shape> shapes(args.size());
    std::transform(
        args.begin(), args.end(), shapes.begin(), [](instruction_ref i) { return i->result; });

--- a/src/include/migraph/shape.hpp
+++ b/src/include/migraph/shape.hpp
@@ -80,11 +80,13 @@ struct shape

    /// Returns true if the shape is packed with no padding
    bool packed() const;
-    /// Returns true is the shape has been transposed. That is the strides are not in descending order
+    /// Returns true is the shape has been transposed. That is the strides are not in descending
+    /// order
    bool transposed() const;
    /// Returns true if the shape is broadcasting a dimension. That is, one of the strides are zero
    bool broadcasted() const;
-    /// Returns true if the shape is in its standard format. That is, the shape is both packed and not transposed.
+    /// Returns true if the shape is in its standard format. That is, the shape is both packed and
+    /// not transposed.
    bool standard() const;

    friend bool operator==(const shape& x, const shape& y);

--- a/src/program.cpp
+++ b/src/program.cpp
@@ -32,6 +32,7 @@ program::insert_instruction(instruction_ref ins, operation op, std::vector<instr
    assert(std::all_of(
               args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
           "Argument is not an exisiting instruction");
+    assert(not starts_with(op.name(), "@"));
    // TODO: Use move
    shape r     = compute_shape(op, args);
    auto result = impl->instructions.insert(ins, {op, r, args});
@@ -46,6 +47,7 @@ program::replace_instruction(instruction_ref ins, operation op, std::vector<inst
    assert(std::all_of(
               args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
           "Argument is not an exisiting instruction");
+    assert(not starts_with(op.name(), "@"));

    shape r = compute_shape(op, args);
    ins->replace(op, r, args);

--- a/src/targets/gpu/hip_contiguous.cpp
+++ b/src/targets/gpu/hip_contiguous.cpp
@@ -5,16 +5,42 @@
 namespace migraph {
 namespace gpu {

+struct index
+{
+    std::size_t global;
+    std::size_t local;
+    std::size_t group;
+};
+
+template<class F>
+__global__ void launcher(F f)
+{
+    index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x};
+    f(idx);
+}
+
+auto launch(std::size_t global, std::size_t local)
+{
+    return [&](auto f) {
+        assert(local > 0);
+        assert(global > 0);
+        using f_type = decltype(f);
+        dim3 nblocks(global / local);
+        dim3 nthreads(local);
+        hipLaunchKernelGGL((launcher<f_type>),
+                                   nblocks,
+                                   nthreads,
+                                   0,
+                                   nullptr,
+                                   f);
+    };
+}
+
 template <class F>
 void visit_tensor_size(std::size_t n, F f)
 {
    switch(n)
    {
-    case 0:
-    {
-        f(std::integral_constant<std::size_t, 0>{});
-        break;
-    }
    case 1:
    {
        f(std::integral_constant<std::size_t, 1>{});
@@ -86,48 +112,43 @@ struct hip_tensor_descriptor
    size_t strides[NDim] = {};
 };

-template <typename T, size_t NDim>
-__global__ void contiguous_gpu(const T* a,
-                               hip_tensor_descriptor<NDim> a_desc,
-                               T* at,
-                               hip_tensor_descriptor<NDim> at_desc,
-                               size_t nelements)
-{
-    for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < nelements;
-        i += blockDim.x * gridDim.x)
-    {
-        hip_index<NDim> s = at_desc.multi(i);
-        size_t lidx       = a_desc.linear(s);
-        at[i]             = a[lidx];
-    }
-}
+// template <typename T, size_t NDim>
+// __global__ void contiguous_gpu(const T* a,
+//                                hip_tensor_descriptor<NDim> a_desc,
+//                                T* at,
+//                                hip_tensor_descriptor<NDim> at_desc,
+//                                size_t nelements)
+// {
+//     for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < nelements;
+//         i += blockDim.x * gridDim.x)
+//     {
+//         hip_index<NDim> s = at_desc.multi(i);
+//         size_t lidx       = a_desc.linear(s);
+//         at[i]             = a[lidx];
+//     }
+// }

 void hip_contiguous(migraph::shape output_shape, migraph::argument arg, migraph::argument result)
 {
-    size_t ndim = output_shape.lens().size();
    visit_all(result, arg)([&](auto output, auto input) {
-        if(ndim == 4)
-        {
+        visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
            const auto& s = arg.get_shape();
-            hip_tensor_descriptor<4> a_desc(s.lens(), s.strides());
-            hip_tensor_descriptor<4> at_desc(output_shape.lens(), output_shape.strides());
-            dim3 nblocks(512);
-            dim3 nthreads(512);
-            hipLaunchKernelGGL((contiguous_gpu<int, 4>),
-                               nblocks,
-                               nthreads,
-                               0,
-                               nullptr,
-                               input.data(),
-                               a_desc,
-                               output.data(),
-                               at_desc,
-                               s.elements());
-        }
-        else
-        {
-            MIGRAPH_THROW("contiguous is only valid for 4D tensors");
-        }
+            hip_tensor_descriptor<ndim> a_desc(s.lens(), s.strides());
+            hip_tensor_descriptor<ndim> at_desc(output_shape.lens(), output_shape.strides());
+            auto* a = input.data();
+            auto* at = output.data();
+            auto nelements = s.elements();
+            std::size_t nlocal = 512;
+            std::size_t nglobal = 512*nlocal;
+
+            launch(nglobal, nlocal)([=](auto idx) mutable {
+                for(size_t i = idx.global; i < nelements; i += nglobal)
+                {
+                    size_t lidx       = a_desc.linear(at_desc.multi(i));
+                    at[i]             = a[lidx];
+                }
+            });
+        });
    });
 }
 } // namespace gpu

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -23,7 +23,7 @@ struct miopen_convolution
    std::string name() const { return "gpu::convolution"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3);
+        check_shapes{inputs, *this}.has(3).standard();
        return op.compute_shape({inputs.at(0), inputs.at(1)});
    }
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
@@ -74,7 +74,7 @@ struct miopen_pooling
    std::string name() const { return "gpu::pooling"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2);
+        check_shapes{inputs, *this}.has(2).standard();
        return op.compute_shape({inputs.at(1)});
    }
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
@@ -105,7 +105,7 @@ struct miopen_add
    std::string name() const { return "gpu::add"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3);
+        check_shapes{inputs, *this}.has(3).not_broadcasted();
        return inputs.at(0);
    }

@@ -207,7 +207,7 @@ struct miopen_relu
    std::string name() const { return "gpu::relu"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2);
+        check_shapes{inputs, *this}.has(2).not_broadcasted();
        return inputs.at(1);
    }


--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -3,13 +3,22 @@
 #include <migraph/gpu/write_literals.hpp>
 #include <migraph/gpu/context.hpp>
 #include <migraph/check_context.hpp>
+#include <migraph/auto_contiguous.hpp>

 namespace migraph {
 namespace gpu {

 std::vector<pass> target::get_passes(migraph::context&) const
 {
-    return {lowering{}, write_literals{}, check_context<context>{}};
+    // clang-format off
+    return
+    {
+        auto_contiguous{},
+        lowering{},
+        write_literals{},
+        check_context<context>{}
+    };
+    // clang-format on
 }

 std::string target::name() const { return "miopen"; }

--- a/test/auto_contiguous_test.cpp
+++ b/test/auto_contiguous_test.cpp
@@ -51,8 +51,41 @@ void after_literal_broadcast()
    EXPECT(not p.get_shape().broadcasted());
 }

+void after_param_transpose()
+{
+    migraph::program p;
+    auto l = p.add_parameter("2x2", {migraph::shape::float_type, {2, 2}});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().transposed());
+    auto t = p.add_instruction(migraph::transpose{{1, 0}}, l);
+    p.add_instruction(pass_op{}, t);
+    EXPECT(not p.get_shape().standard());
+    EXPECT(p.get_shape().transposed());
+    p.compile(contiguous_target{});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().transposed());
+}
+
+void after_param_broadcast()
+{
+    migraph::program p;
+    auto l1 = p.add_parameter("2x2", {migraph::shape::float_type, {2, 2}});
+    auto l2 = p.add_parameter("2", {migraph::shape::float_type, {2}});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().broadcasted());
+    auto b = p.add_instruction(migraph::broadcast{}, l1, l2);
+    p.add_instruction(pass_op{}, b);
+    EXPECT(not p.get_shape().standard());
+    EXPECT(p.get_shape().broadcasted());
+    p.compile(contiguous_target{});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().broadcasted());
+}
+
 int main()
 {
    after_literal_transpose();
    after_literal_broadcast();
+    after_param_transpose();
+    after_param_broadcast();
 }
--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -14,6 +14,28 @@
 #include "test.hpp"
 #include "verify.hpp"

+struct auto_eval
+{
+    migraph::program* p;
+    migraph::program::parameter_map * m;
+    migraph::argument result;
+
+    auto_eval(migraph::program& pp, migraph::program::parameter_map& pm)
+    : p(&pp), m(&pm)
+    {}
+
+    migraph::argument operator()() const
+    {
+        return p->eval(*m);
+    }
+
+    ~auto_eval()
+    {
+        if(std::uncaught_exception())
+            std::cout << *p << std::endl;
+    }
+};
+
 template <class V>
 migraph::argument run_cpu()
 {
@@ -25,7 +47,7 @@ migraph::argument run_cpu()
    {
        m[x.first] = migraph::generate_argument(x.second);
    }
-    return p.eval(m);
+    return auto_eval(p, m)();
 }

 template <class V>
@@ -41,7 +63,7 @@ migraph::argument run_gpu()
        m[x.first] = migraph::gpu::to_gpu(migraph::generate_argument(x.second));
    }

-    return migraph::gpu::from_gpu(p.eval(m));
+    return migraph::gpu::from_gpu(auto_eval(p, m)());
 }

 template <class V>