Merge branch 'miopen'

0005506c · Paul · f2e18b73 · e86a2f45 · 0005506c · 0005506c
Commit 0005506c authored Jun 11, 2018 by Paul
5 changed files
--- a/src/targets/miopen/include/rtg/miopen/miopen_target.hpp
+++ b/src/targets/miopen/include/rtg/miopen/miopen_target.hpp
+#ifndef RTG_GUARD_RTGLIB_MIOPEN_TARGET_HPP
+#define RTG_GUARD_RTGLIB_MIOPEN_TARGET_HPP
+
+#include <rtg/program.hpp>
+
+namespace rtg {
+namespace miopen {
+
+struct miopen_target
+{
+    std::string name() const;
+    void apply(program& p) const;
+};
+
+} // namespace miopen
+
+} // namespace rtg
+
+#endif
--- a/src/targets/miopen/miopen_target.cpp
+++ b/src/targets/miopen/miopen_target.cpp
+#include <rtg/miopen/miopen_target.hpp>
+#include <rtg/manage_ptr.hpp>
+#include <rtg/instruction.hpp>
+#include <rtg/operators.hpp>
+
+#include <miopen/miopen.h>
+
+namespace rtg {
+namespace miopen {
+
+struct hip_allocate
+{
+    std::string name() const { return "hip::allocate"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        return inputs.front();
+    }
+    argument compute(shape output_shape, std::vector<argument>) const
+    {
+        char* data = nullptr;
+        // TODO: Check return status
+        hipMalloc(&data, output_shape.bytes());
+        return {output_shape, data};
+    }
+};
+
+struct hip_free
+{
+    std::string name() const { return "hip::free"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        return {};
+    }
+    argument compute(shape, std::vector<argument> args) const
+    {
+        // TODO: Check return status
+        hipFree(args.front().data());
+        return {};
+    }
+};
+
+using miopen_handle     = RTG_MANAGE_PTR(miopenHandle_t, miopenDestroy);
+using tensor_descriptor = RTG_MANAGE_PTR(miopenTensorDescriptor_t, miopenDestroyTensorDescriptor);
+using convolution_descriptor = RTG_MANAGE_PTR(miopenConvolutionDescriptor_t,
+                                              miopenDestroyConvolutionDescriptor);
+using activation_descriptor  = RTG_MANAGE_PTR(miopenActivationDescriptor_t,
+                                             miopenDestroyActivationDescriptor);
+
+template <class Result, class F, class... Ts>
+Result make_obj(F f, Ts... xs)
+{
+    typename Result::pointer x = nullptr;
+    auto status                = f(&x, xs...);
+    Result r{x};
+    if(status != miopenStatusSuccess)
+        RTG_THROW("MIOpen call failed");
+    return r;
+}
+
+tensor_descriptor make_tensor(const rtg::shape& s)
+{
+    auto t = make_obj<tensor_descriptor>(&miopenCreateTensorDescriptor);
+    // Convert to ints
+    std::vector<int> lens(s.lens().begin(), s.lens().end());
+    std::vector<int> strides(s.strides().begin(), s.strides().end());
+    miopenDataType_t d;
+    if(s.type() == shape::float_type)
+        d = miopenFloat;
+    else
+        RTG_THROW("Unsupported type");
+    miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());
+    return t;
+}
+
+convolution_descriptor make_conv(const rtg::convolution& op)
+{
+    auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
+    miopenInitConvolutionDescriptor(c.get(),
+                                    miopenConvolution,
+                                    op.padding[0],
+                                    op.padding[1],
+                                    op.stride[0],
+                                    op.stride[1],
+                                    op.dilation[0],
+                                    op.dilation[1]);
+    return c;
+}
+
+activation_descriptor make_relu()
+{
+    auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
+    miopenSetActivationDescriptor(ad.get(), miopenActivationRELU, 0, 0, 0);
+    return ad;
+}
+
+struct miopen_convolution
+{
+    convolution op;
+    shared<convolution_descriptor> cd;
+
+    std::string name() const { return "miopen::convolution"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(4);
+        return op.compute_shape({inputs.at(1), inputs.at(2)});
+    }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        auto x_desc = make_tensor(args[1].get_shape());
+        auto w_desc = make_tensor(args[2].get_shape());
+        auto y_desc = make_tensor(output_shape);
+
+        float alpha = 1, beta = 0;
+        int algo_count;
+        miopenConvAlgoPerf_t perf;
+        miopenFindConvolutionForwardAlgorithm(args[0].get(),
+                                              x_desc.get(),
+                                              args[1].get(),
+                                              w_desc.get(),
+                                              args[2].get(),
+                                              cd.get(),
+                                              y_desc.get(),
+                                              args[3].get(),
+                                              1,
+                                              &algo_count,
+                                              &perf,
+                                              nullptr,
+                                              0,
+                                              false);
+        miopenConvolutionForward(args[0].get(),
+                                 &alpha,
+                                 x_desc.get(),
+                                 args[1].get(),
+                                 w_desc.get(),
+                                 args[2].get(),
+                                 cd.get(),
+                                 perf.fwd_algo,
+                                 &beta,
+                                 y_desc.get(),
+                                 args[3].get(),
+                                 nullptr,
+                                 0);
+        return args[3];
+    }
+};
+
+struct miopen_relu
+{
+    shared<activation_descriptor> ad;
+    std::string name() const { return "miopen::relu"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(3);
+        return inputs.at(1);
+    }
+
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        float alpha = 1, beta = 0;
+        auto x_desc = make_tensor(args[1].get_shape());
+        auto y_desc = make_tensor(output_shape);
+        miopenActivationForward(args[0].get(),
+                                ad.get(),
+                                &alpha,
+                                x_desc.get(),
+                                args[1].get(),
+                                &beta,
+                                y_desc.get(),
+                                args[2].get());
+
+        return args[2];
+    }
+};
+
+struct miopen_apply
+{
+    program* prog = nullptr;
+    instruction_ref handle{};
+
+    void apply()
+    {
+        handle = prog->add_parameter("handle", shape{shape::any_type});
+        for(auto it = prog->begin(); it != prog->end(); it++)
+        {
+            if(it->op.name() == "convolution")
+            {
+                apply_convolution(it);
+            }
+            else if(it->op.name() == "activation")
+            {
+                apply_activation(it);
+            }
+        }
+    }
+
+    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
+    {
+        if(ins == --prog->end())
+        {
+            return prog->add_parameter("output", s);
+        }
+        else
+        {
+            auto is     = prog->add_outline(s);
+            auto result = prog->insert_instruction(ins, hip_allocate{}, is);
+            prog->insert_instruction(++ins, hip_free{}, result);
+            return result;
+        }
+    }
+
+    void apply_convolution(instruction_ref ins)
+    {
+        auto&& op   = any_cast<convolution>(ins->op);
+        auto cd     = make_conv(op);
+        auto output = insert_allocation(ins, ins->result);
+
+        prog->replace_instruction(ins,
+                                  miopen_convolution{op, std::move(cd)},
+                                  handle,
+                                  ins->arguments.at(0),
+                                  ins->arguments.at(1),
+                                  output);
+    }
+
+    void apply_activation(instruction_ref ins)
+    {
+        auto&& op = any_cast<activation>(ins->op);
+        auto ad   = make_relu();
+        if(op.mode == "relu")
+        {
+            auto output = insert_allocation(ins, ins->result);
+            prog->replace_instruction(
+                ins, miopen_relu{std::move(ad)}, handle, ins->arguments.at(0), output);
+        }
+    }
+};
+
+std::string miopen_target::name() const { return "miopen"; }
+
+void miopen_target::apply(program& p) const { miopen_apply{&p}.apply(); }
+
+} // namespace miopen
+
+} // namespace rtg
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -4,9 +4,20 @@ cmake_policy(SET CMP0057 NEW)
 include(CTest)

 find_package(Threads REQUIRED)
-add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
+include(ProcessorCount)
+ProcessorCount(N)
+set(CTEST_PARALLEL_LEVEL ${N} CACHE STRING "CTest parallel level")
+add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j ${CTEST_PARALLEL_LEVEL} -C ${CMAKE_CFG_INTDIR})
 add_custom_target(tests)

+find_program(RTG_GDB gdb)
+
+if(RTG_GDB)
+    set(RTG_TEST_GDB On CACHE BOOL "")
+else()
+    set(RTG_TEST_GDB Off CACHE BOOL "")
+endif()
+
 set(SKIP_TESTS)

 function(add_test_command NAME EXE)
@@ -23,13 +34,21 @@ function(add_test_command NAME EXE)
                    %1 ${ARGN}")
        add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>)
    else()
-        if(MIOPEN_TEST_GDB)
+        if(RTG_TEST_GDB)
+            # add_test(NAME ${NAME} COMMAND ${RTG_GDB} 
+            #     --batch
+            #     --return-child-result
+            #     -ex "set disable-randomization off"
+            #     -ex run
+            #     -ex backtrace
+            #     --args $<TARGET_FILE:${EXE}> ${ARGN})
            file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmake"
                CONTENT "
                execute_process(COMMAND $<TARGET_FILE:${EXE}> ${ARGN} RESULT_VARIABLE RESULT)
                if(NOT RESULT EQUAL 0)
+                    # TODO: check for core files based on pid when setting /proc/sys/kernel/core_uses_pid
                    if(EXISTS core)
-                        execute_process(COMMAND gdb $<TARGET_FILE:${EXE}> core -batch -ex bt)
+                        execute_process(COMMAND ${RTG_GDB} $<TARGET_FILE:${EXE}> core -batch -ex bt)
                    endif()
                    message(FATAL_ERROR \"Test failed\")
                endif()
@@ -59,7 +78,8 @@ function(add_test_executable TEST_NAME)
    add_dependencies(tests ${TEST_NAME})
    add_dependencies(check ${TEST_NAME})
    set_tests_properties(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
-    target_link_libraries(${TEST_NAME} rtg)
+    target_link_libraries(${TEST_NAME} rtg rtg_cpu)
+    target_include_directories(${TEST_NAME} PUBLIC include)
 endfunction(add_test_executable)

 file(GLOB TESTS *.cpp)
@@ -68,3 +88,14 @@ foreach(TEST ${TESTS})
    get_filename_component(BASE_NAME ${TEST} NAME_WE)
    add_test_executable(test_${BASE_NAME} ${TEST})
 endforeach()
+
+if(RTG_ENABLE_MIOPEN)
+    # miopen tests
+    file(GLOB MIOPEN_TESTS miopen/*.cpp)
+
+    foreach(TEST ${MIOPEN_TESTS})
+        get_filename_component(BASE_NAME ${TEST} NAME_WE)
+        add_test_executable(test_miopen_${BASE_NAME} ${TEST})
+        target_link_libraries(test_miopen_${BASE_NAME} rtg_miopen)
+    endforeach()
+endif()
--- a/test/test.hpp
+++ b/test/test.hpp
@@ -78,11 +78,11 @@ struct lhs_expression

    T value() const { return lhs; }
 // NOLINTNEXTLINE
-#define TEST_LHS_OPERATOR(op, name)               \
-    template <class U>                            \
-    auto operator op(const U& rhs) const          \
-    {                                             \
-        return make_expression(lhs, rhs, name{}); \
+#define TEST_LHS_OPERATOR(op, name)                            \
+    template <class U>                                         \
+    auto operator op(const U& rhs) const                       \
+    {                                                          \
+        return make_expression(lhs, rhs, name{}); /* NOLINT */ \
    }

    TEST_FOREACH_OPERATOR(TEST_LHS_OPERATOR)

--- a/test/miopen/miopen.cpp
+++ b/test/miopen/miopen.cpp
+
+#include <rtg/program.hpp>
+#include <rtg/operators.hpp>
+#include <rtg/cpu/cpu_target.hpp>
+#include <rtg/miopen/miopen_target.hpp>
+#include <rtg/manage_ptr.hpp>
+
+#include <miopen/miopen.h>
+
+#include <random>
+
+#include "test.hpp"
+
+using hip_ptr       = RTG_MANAGE_PTR(void, hipFree);
+using miopen_handle = RTG_MANAGE_PTR(miopenHandle_t, miopenDestroy);
+
+template <class Result, class F, class... Ts>
+Result make_obj(F f, Ts... xs)
+{
+    typename Result::pointer x = nullptr;
+    auto status                = f(&x, xs...);
+    Result r{x};
+    if(status != miopenStatusSuccess)
+        RTG_THROW("MIOpen call failed");
+    return r;
+}
+
+hip_ptr hip_allocate(std::size_t sz)
+{
+    void* result;
+    // TODO: Check status
+    hipMalloc(&result, sz);
+    return hip_ptr{result};
+}
+
+template <class T>
+hip_ptr write(const T& x)
+{
+    using type  = typename T::value_type;
+    auto size   = x.size() * sizeof(type);
+    auto result = hip_allocate(size);
+    // TODO: Check status
+    hipMemcpy(result.get(), x.data(), size, hipMemcpyHostToDevice);
+    return result;
+}
+
+template <class T>
+std::vector<T> read(const void* x, std::size_t sz)
+{
+    std::vector<T> result(sz);
+    // TODO: Check status
+    hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
+    return result;
+}
+
+rtg::program create_program()
+{
+    rtg::program p;
+    auto input   = p.add_parameter("x", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}});
+    auto weights = p.add_parameter("w", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}});
+    auto conv    = p.add_instruction(rtg::convolution{}, input, weights);
+    p.add_instruction(rtg::activation{"relu"}, conv);
+    return p;
+}
+
+std::vector<float> get_tensor_data(rtg::shape s)
+{
+    std::vector<float> result(s.elements());
+    std::mt19937 engine{0};
+    std::uniform_real_distribution<> dist;
+    std::generate(result.begin(), result.end(), [&] { return dist(engine); });
+    return result;
+}
+
+rtg::argument get_tensor_argument_cpu(rtg::shape s)
+{
+    auto v = get_tensor_data(s);
+    return {s, [v]() mutable { return reinterpret_cast<char*>(v.data()); }};
+}
+
+rtg::argument get_tensor_argument_gpu(rtg::shape s)
+{
+    auto v = get_tensor_data(s);
+    auto p = rtg::share(write(v));
+    return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
+}
+
+std::vector<float> cpu()
+{
+    std::vector<float> result;
+    auto p = create_program();
+    auto x = get_tensor_argument_cpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    auto w = get_tensor_argument_cpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    p.compile(rtg::cpu::cpu_target{});
+    auto r = p.eval({{"x", x}, {"w", w}});
+    r.visit([&](auto output) { result.assign(output.begin(), output.end()); });
+    return result;
+}
+
+std::vector<float> gpu()
+{
+    std::vector<float> result;
+    auto p = create_program();
+    auto x = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    auto w = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    p.compile(rtg::miopen::miopen_target{});
+    auto y      = get_tensor_argument_gpu(p.get_parameter_shape("output"));
+    auto handle = make_obj<miopen_handle>(&miopenCreate);
+    auto r      = p.eval(
+        {{"x", x}, {"w", w}, {"output", y}, {"handle", {rtg::shape::any_type, handle.get()}}});
+    result = read<float>(r.data(), r.get_shape().elements());
+    return result;
+}
+
+void test1()
+{
+    auto x = cpu();
+    auto y = gpu();
+    // TODO: Use expect
+    if(x == y)
+        std::cout << "FAILED" << std::endl;
+}
+
+int main() { test1(); }