Commit 0005506c authored by Paul's avatar Paul
Browse files

Merge branch 'miopen'

parents f2e18b73 e86a2f45
#ifndef RTG_GUARD_RTGLIB_MIOPEN_TARGET_HPP
#define RTG_GUARD_RTGLIB_MIOPEN_TARGET_HPP
#include <rtg/program.hpp>
namespace rtg {
namespace miopen {
struct miopen_target
{
std::string name() const;
void apply(program& p) const;
};
} // namespace miopen
} // namespace rtg
#endif
#include <rtg/miopen/miopen_target.hpp>
#include <rtg/manage_ptr.hpp>
#include <rtg/instruction.hpp>
#include <rtg/operators.hpp>
#include <miopen/miopen.h>
namespace rtg {
namespace miopen {
struct hip_allocate
{
std::string name() const { return "hip::allocate"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(1);
return inputs.front();
}
argument compute(shape output_shape, std::vector<argument>) const
{
char* data = nullptr;
// TODO: Check return status
hipMalloc(&data, output_shape.bytes());
return {output_shape, data};
}
};
struct hip_free
{
std::string name() const { return "hip::free"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(1);
return {};
}
argument compute(shape, std::vector<argument> args) const
{
// TODO: Check return status
hipFree(args.front().data());
return {};
}
};
using miopen_handle = RTG_MANAGE_PTR(miopenHandle_t, miopenDestroy);
using tensor_descriptor = RTG_MANAGE_PTR(miopenTensorDescriptor_t, miopenDestroyTensorDescriptor);
using convolution_descriptor = RTG_MANAGE_PTR(miopenConvolutionDescriptor_t,
miopenDestroyConvolutionDescriptor);
using activation_descriptor = RTG_MANAGE_PTR(miopenActivationDescriptor_t,
miopenDestroyActivationDescriptor);
template <class Result, class F, class... Ts>
Result make_obj(F f, Ts... xs)
{
typename Result::pointer x = nullptr;
auto status = f(&x, xs...);
Result r{x};
if(status != miopenStatusSuccess)
RTG_THROW("MIOpen call failed");
return r;
}
tensor_descriptor make_tensor(const rtg::shape& s)
{
auto t = make_obj<tensor_descriptor>(&miopenCreateTensorDescriptor);
// Convert to ints
std::vector<int> lens(s.lens().begin(), s.lens().end());
std::vector<int> strides(s.strides().begin(), s.strides().end());
miopenDataType_t d;
if(s.type() == shape::float_type)
d = miopenFloat;
else
RTG_THROW("Unsupported type");
miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());
return t;
}
convolution_descriptor make_conv(const rtg::convolution& op)
{
auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
miopenInitConvolutionDescriptor(c.get(),
miopenConvolution,
op.padding[0],
op.padding[1],
op.stride[0],
op.stride[1],
op.dilation[0],
op.dilation[1]);
return c;
}
activation_descriptor make_relu()
{
auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
miopenSetActivationDescriptor(ad.get(), miopenActivationRELU, 0, 0, 0);
return ad;
}
struct miopen_convolution
{
convolution op;
shared<convolution_descriptor> cd;
std::string name() const { return "miopen::convolution"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(4);
return op.compute_shape({inputs.at(1), inputs.at(2)});
}
argument compute(shape output_shape, std::vector<argument> args) const
{
auto x_desc = make_tensor(args[1].get_shape());
auto w_desc = make_tensor(args[2].get_shape());
auto y_desc = make_tensor(output_shape);
float alpha = 1, beta = 0;
int algo_count;
miopenConvAlgoPerf_t perf;
miopenFindConvolutionForwardAlgorithm(args[0].get(),
x_desc.get(),
args[1].get(),
w_desc.get(),
args[2].get(),
cd.get(),
y_desc.get(),
args[3].get(),
1,
&algo_count,
&perf,
nullptr,
0,
false);
miopenConvolutionForward(args[0].get(),
&alpha,
x_desc.get(),
args[1].get(),
w_desc.get(),
args[2].get(),
cd.get(),
perf.fwd_algo,
&beta,
y_desc.get(),
args[3].get(),
nullptr,
0);
return args[3];
}
};
struct miopen_relu
{
shared<activation_descriptor> ad;
std::string name() const { return "miopen::relu"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs}.has(3);
return inputs.at(1);
}
argument compute(shape output_shape, std::vector<argument> args) const
{
float alpha = 1, beta = 0;
auto x_desc = make_tensor(args[1].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(args[0].get(),
ad.get(),
&alpha,
x_desc.get(),
args[1].get(),
&beta,
y_desc.get(),
args[2].get());
return args[2];
}
};
struct miopen_apply
{
program* prog = nullptr;
instruction_ref handle{};
void apply()
{
handle = prog->add_parameter("handle", shape{shape::any_type});
for(auto it = prog->begin(); it != prog->end(); it++)
{
if(it->op.name() == "convolution")
{
apply_convolution(it);
}
else if(it->op.name() == "activation")
{
apply_activation(it);
}
}
}
instruction_ref insert_allocation(instruction_ref ins, const shape& s)
{
if(ins == --prog->end())
{
return prog->add_parameter("output", s);
}
else
{
auto is = prog->add_outline(s);
auto result = prog->insert_instruction(ins, hip_allocate{}, is);
prog->insert_instruction(++ins, hip_free{}, result);
return result;
}
}
void apply_convolution(instruction_ref ins)
{
auto&& op = any_cast<convolution>(ins->op);
auto cd = make_conv(op);
auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(ins,
miopen_convolution{op, std::move(cd)},
handle,
ins->arguments.at(0),
ins->arguments.at(1),
output);
}
void apply_activation(instruction_ref ins)
{
auto&& op = any_cast<activation>(ins->op);
auto ad = make_relu();
if(op.mode == "relu")
{
auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(
ins, miopen_relu{std::move(ad)}, handle, ins->arguments.at(0), output);
}
}
};
std::string miopen_target::name() const { return "miopen"; }
void miopen_target::apply(program& p) const { miopen_apply{&p}.apply(); }
} // namespace miopen
} // namespace rtg
......@@ -4,9 +4,20 @@ cmake_policy(SET CMP0057 NEW)
include(CTest)
find_package(Threads REQUIRED)
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
include(ProcessorCount)
ProcessorCount(N)
set(CTEST_PARALLEL_LEVEL ${N} CACHE STRING "CTest parallel level")
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j ${CTEST_PARALLEL_LEVEL} -C ${CMAKE_CFG_INTDIR})
add_custom_target(tests)
find_program(RTG_GDB gdb)
if(RTG_GDB)
set(RTG_TEST_GDB On CACHE BOOL "")
else()
set(RTG_TEST_GDB Off CACHE BOOL "")
endif()
set(SKIP_TESTS)
function(add_test_command NAME EXE)
......@@ -23,13 +34,21 @@ function(add_test_command NAME EXE)
%1 ${ARGN}")
add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>)
else()
if(MIOPEN_TEST_GDB)
if(RTG_TEST_GDB)
# add_test(NAME ${NAME} COMMAND ${RTG_GDB}
# --batch
# --return-child-result
# -ex "set disable-randomization off"
# -ex run
# -ex backtrace
# --args $<TARGET_FILE:${EXE}> ${ARGN})
file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmake"
CONTENT "
execute_process(COMMAND $<TARGET_FILE:${EXE}> ${ARGN} RESULT_VARIABLE RESULT)
if(NOT RESULT EQUAL 0)
# TODO: check for core files based on pid when setting /proc/sys/kernel/core_uses_pid
if(EXISTS core)
execute_process(COMMAND gdb $<TARGET_FILE:${EXE}> core -batch -ex bt)
execute_process(COMMAND ${RTG_GDB} $<TARGET_FILE:${EXE}> core -batch -ex bt)
endif()
message(FATAL_ERROR \"Test failed\")
endif()
......@@ -59,7 +78,8 @@ function(add_test_executable TEST_NAME)
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
set_tests_properties(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
target_link_libraries(${TEST_NAME} rtg)
target_link_libraries(${TEST_NAME} rtg rtg_cpu)
target_include_directories(${TEST_NAME} PUBLIC include)
endfunction(add_test_executable)
file(GLOB TESTS *.cpp)
......@@ -68,3 +88,14 @@ foreach(TEST ${TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_${BASE_NAME} ${TEST})
endforeach()
if(RTG_ENABLE_MIOPEN)
# miopen tests
file(GLOB MIOPEN_TESTS miopen/*.cpp)
foreach(TEST ${MIOPEN_TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_miopen_${BASE_NAME} ${TEST})
target_link_libraries(test_miopen_${BASE_NAME} rtg_miopen)
endforeach()
endif()
......@@ -78,11 +78,11 @@ struct lhs_expression
T value() const { return lhs; }
// NOLINTNEXTLINE
#define TEST_LHS_OPERATOR(op, name) \
template <class U> \
auto operator op(const U& rhs) const \
{ \
return make_expression(lhs, rhs, name{}); \
#define TEST_LHS_OPERATOR(op, name) \
template <class U> \
auto operator op(const U& rhs) const \
{ \
return make_expression(lhs, rhs, name{}); /* NOLINT */ \
}
TEST_FOREACH_OPERATOR(TEST_LHS_OPERATOR)
......
#include <rtg/program.hpp>
#include <rtg/operators.hpp>
#include <rtg/cpu/cpu_target.hpp>
#include <rtg/miopen/miopen_target.hpp>
#include <rtg/manage_ptr.hpp>
#include <miopen/miopen.h>
#include <random>
#include "test.hpp"
using hip_ptr = RTG_MANAGE_PTR(void, hipFree);
using miopen_handle = RTG_MANAGE_PTR(miopenHandle_t, miopenDestroy);
template <class Result, class F, class... Ts>
Result make_obj(F f, Ts... xs)
{
typename Result::pointer x = nullptr;
auto status = f(&x, xs...);
Result r{x};
if(status != miopenStatusSuccess)
RTG_THROW("MIOpen call failed");
return r;
}
hip_ptr hip_allocate(std::size_t sz)
{
void* result;
// TODO: Check status
hipMalloc(&result, sz);
return hip_ptr{result};
}
template <class T>
hip_ptr write(const T& x)
{
using type = typename T::value_type;
auto size = x.size() * sizeof(type);
auto result = hip_allocate(size);
// TODO: Check status
hipMemcpy(result.get(), x.data(), size, hipMemcpyHostToDevice);
return result;
}
template <class T>
std::vector<T> read(const void* x, std::size_t sz)
{
std::vector<T> result(sz);
// TODO: Check status
hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
return result;
}
rtg::program create_program()
{
rtg::program p;
auto input = p.add_parameter("x", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}});
auto weights = p.add_parameter("w", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}});
auto conv = p.add_instruction(rtg::convolution{}, input, weights);
p.add_instruction(rtg::activation{"relu"}, conv);
return p;
}
std::vector<float> get_tensor_data(rtg::shape s)
{
std::vector<float> result(s.elements());
std::mt19937 engine{0};
std::uniform_real_distribution<> dist;
std::generate(result.begin(), result.end(), [&] { return dist(engine); });
return result;
}
rtg::argument get_tensor_argument_cpu(rtg::shape s)
{
auto v = get_tensor_data(s);
return {s, [v]() mutable { return reinterpret_cast<char*>(v.data()); }};
}
rtg::argument get_tensor_argument_gpu(rtg::shape s)
{
auto v = get_tensor_data(s);
auto p = rtg::share(write(v));
return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
}
std::vector<float> cpu()
{
std::vector<float> result;
auto p = create_program();
auto x = get_tensor_argument_cpu({rtg::shape::float_type, {4, 3, 3, 3}});
auto w = get_tensor_argument_cpu({rtg::shape::float_type, {4, 3, 3, 3}});
p.compile(rtg::cpu::cpu_target{});
auto r = p.eval({{"x", x}, {"w", w}});
r.visit([&](auto output) { result.assign(output.begin(), output.end()); });
return result;
}
std::vector<float> gpu()
{
std::vector<float> result;
auto p = create_program();
auto x = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}});
auto w = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}});
p.compile(rtg::miopen::miopen_target{});
auto y = get_tensor_argument_gpu(p.get_parameter_shape("output"));
auto handle = make_obj<miopen_handle>(&miopenCreate);
auto r = p.eval(
{{"x", x}, {"w", w}, {"output", y}, {"handle", {rtg::shape::any_type, handle.get()}}});
result = read<float>(r.data(), r.get_shape().elements());
return result;
}
void test1()
{
auto x = cpu();
auto y = gpu();
// TODO: Use expect
if(x == y)
std::cout << "FAILED" << std::endl;
}
int main() { test1(); }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment