Commit df78aadf authored by wsttiger's avatar wsttiger
Browse files

merged from master

parents ba934fc2 58681660
......@@ -88,6 +88,8 @@ argument from_gpu(argument arg)
return result;
}
void gpu_sync() { hipDeviceSynchronize(); }
} // namespace gpu
} // namespace migraph
......@@ -3,6 +3,7 @@
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/hip.hpp>
namespace migraph {
namespace gpu {
......@@ -11,6 +12,8 @@ struct context
{
shared<miopen_handle> handle;
shared<rocblas_handle_ptr> rbhandle;
std::vector<argument> literals{};
void finish() const { gpu_sync(); }
};
} // namespace gpu
......
#ifndef MIGRAPH_GUARD_RTGLIB_ELIMINATE_ALLOCATION_HPP
#define MIGRAPH_GUARD_RTGLIB_ELIMINATE_ALLOCATION_HPP
#include <string>
#include <migraph/instruction_ref.hpp>
namespace migraph {
struct program;
namespace gpu {
struct eliminate_allocation
{
std::string name() const { return "eliminate_allocation"; }
void apply(program& p) const;
};
} // namespace gpu
} // namespace migraph
#endif
......@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false);
migraph::argument from_gpu(migraph::argument arg);
void gpu_sync();
struct hip_allocate
{
std::string tag{};
......@@ -28,6 +30,43 @@ struct hip_allocate
}
};
struct hip_load
{
shape s;
std::size_t offset = 0;
std::string name() const { return "hip::load"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs}.has(1);
return s;
}
argument compute(context&, const shape&, const std::vector<argument>& args) const
{
return {s, args[0].data() + offset};
}
};
struct hip_sync
{
std::string tag{};
std::string name() const { return "hip::sync"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
if(inputs.empty())
return {};
else
return inputs.front();
}
argument compute(context&, const shape&, const std::vector<argument>& args) const
{
gpu_sync();
if(args.empty())
return {};
else
return args.front();
}
};
struct hip_write
{
std::string name() const { return "hip::write"; }
......
......@@ -2,6 +2,7 @@
#define MIGRAPH_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP
#include <migraph/program.hpp>
#include <migraph/gpu/context.hpp>
namespace migraph {
......@@ -9,6 +10,7 @@ namespace gpu {
struct write_literals
{
context* ctx = nullptr;
std::string name() const { return "gpu::write_literals"; }
void apply(program& p) const;
......
......@@ -180,38 +180,22 @@ struct miopen_add
argument
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{
if(args[1].get_shape().broadcasted())
{
argument result{output_shape};
visit_all(result, from_gpu(args[0]), from_gpu(args[1]))(
[&](auto output, auto input1, auto input2) {
shape_for_each(output.get_shape(), [&](const auto& idx) {
output(idx.begin(), idx.end()) =
input1(idx.begin(), idx.end()) + input2(idx.begin(), idx.end());
});
});
return to_gpu(result);
}
else
{
float alpha = 1, beta = 0;
auto a_desc = make_tensor(args[0].get_shape());
auto b_desc = make_tensor(args[1].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(ctx.handle.get(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
args[0].implicit(),
&alpha,
b_desc.get(),
args[1].implicit(),
&beta,
c_desc.get(),
args[2].implicit());
return args[2];
}
float alpha = 1, beta = 0;
auto a_desc = make_tensor(args[0].get_shape());
auto b_desc = make_tensor(args[1].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(ctx.handle.get(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
args[0].implicit(),
&alpha,
b_desc.get(),
args[1].implicit(),
&beta,
c_desc.get(),
args[2].implicit());
return args[2];
}
};
......@@ -266,6 +250,8 @@ struct miopen_contiguous
}
argument compute(context&, shape output_shape, const std::vector<argument>& args) const
{
assert(output_shape == args[1].get_shape());
assert(output_shape.standard());
hip_contiguous(std::move(output_shape), args.at(0), args.at(1));
return args.at(1);
}
......
......@@ -3,6 +3,7 @@
#include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp>
#include <migraph/gpu/eliminate_workspace.hpp>
#include <migraph/gpu/eliminate_allocation.hpp>
#include <migraph/check_context.hpp>
#include <migraph/auto_contiguous.hpp>
#include <migraph/dead_code_elimination.hpp>
......@@ -18,16 +19,17 @@ std::vector<pass> target::get_passes(migraph::context& gctx) const
// clang-format off
return
{
//dead_code_elimination{},
dead_code_elimination{},
auto_contiguous{},
//simplify_reshapes{},
//dead_code_elimination{},
simplify_reshapes{},
dead_code_elimination{},
lowering{ctx},
eliminate_workspace{},
eliminate_contiguous{},
dead_code_elimination{},
write_literals{},
//check_context<context>{},
write_literals{&ctx},
eliminate_allocation{},
check_context<context>{},
dead_code_elimination{}
};
// clang-format on
......
......@@ -7,15 +7,33 @@ namespace migraph {
namespace gpu {
struct hip_load_literal
{
shape s;
std::size_t n = 0;
std::string name() const { return "hip::load_literal"; }
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs}.has(0);
return s;
}
argument compute(context& ctx, const shape&, const std::vector<argument>&) const
{
return ctx.literals.at(n);
}
};
void write_literals::apply(program& p) const
{
assert(ctx != nullptr);
for(auto ins : iterator_for(p))
{
if(ins->op.name() == "@literal")
{
literal l = ins->lit;
auto pre = p.add_literal(l);
p.replace_instruction(ins, hip_write{}, pre);
argument a = to_gpu(ins->lit.get_argument());
std::size_t n = ctx->literals.size();
ctx->literals.push_back(a);
p.replace_instruction(ins, hip_load_literal{a.get_shape(), n});
}
}
}
......
......@@ -68,7 +68,6 @@ endfunction()
function(add_test_executable TEST_NAME)
add_executable (${TEST_NAME} EXCLUDE_FROM_ALL ${ARGN})
rocm_clang_tidy_check(${TEST_NAME})
target_link_libraries(${TEST_NAME} ${CMAKE_THREAD_LIBS_INIT})
# Cmake does not add flags correctly for gcc
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
......@@ -93,6 +92,7 @@ file(GLOB TESTS *.cpp)
foreach(TEST ${TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_${BASE_NAME} ${TEST})
rocm_clang_tidy_check(test_${BASE_NAME})
endforeach()
if(MIGRAPH_ENABLE_GPU)
......@@ -102,12 +102,15 @@ if(MIGRAPH_ENABLE_GPU)
foreach(TEST ${GPU_TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_gpu_${BASE_NAME} ${TEST})
rocm_clang_tidy_check(test_gpu_${BASE_NAME})
set_tests_properties(test_gpu_${BASE_NAME} PROPERTIES COST 10)
target_link_libraries(test_gpu_${BASE_NAME} migraph_gpu)
endforeach()
endif()
# Onnx test
add_executable(test_onnx onnx/onnx_test.cpp)
rocm_clang_tidy_check(test_onnx)
target_link_libraries(test_onnx migraph_onnx)
target_include_directories(test_onnx PUBLIC include)
add_test(NAME test_onnx COMMAND $<TARGET_FILE:test_onnx> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
......
......@@ -24,17 +24,44 @@
// An improved async, that doesn't block
template <class Function>
std::future<typename std::result_of<Function()>::type> detach_async(Function&& f)
std::future<typename std::result_of<Function()>::type> detach_async(Function&& f,
bool parallel = true)
{
using result_type = typename std::result_of<Function()>::type;
std::packaged_task<result_type()> task(std::forward<Function>(f));
auto fut = task.get_future();
std::thread(std::move(task)).detach();
return std::move(fut);
if(parallel)
{
using result_type = typename std::result_of<Function()>::type;
std::packaged_task<result_type()> task(std::forward<Function>(f));
auto fut = task.get_future();
std::thread(std::move(task)).detach();
return std::move(fut);
}
else
{
return std::async(std::launch::deferred, std::forward<Function>(f));
}
}
struct auto_print
{
static void set_terminate_handler(const std::string& name)
{
static std::string pname;
pname = name;
std::set_terminate(+[] {
std::cout << "FAILED: " << pname << std::endl;
try
{
std::rethrow_exception(std::current_exception());
}
catch(const std::exception& e)
{
std::cout << " what(): " << e.what() << std::endl;
}
std::cout << std::endl;
for(auto&& handle : auto_print::handlers)
handle();
});
}
static std::array<std::function<void()>, 2> handlers;
int index;
template <class T>
......@@ -50,13 +77,26 @@ struct auto_print
};
std::array<std::function<void()>, 2> auto_print::handlers = {};
void compile_check(migraph::program& p, const migraph::target& t)
{
auto name = t.name();
auto s = p.get_shape();
std::stringstream ss;
p.compile(t, migraph::tracer{ss});
if(p.get_shape() != s)
{
std::cout << ss.str() << std::endl;
throw std::runtime_error("Compiling program with " + name + " alters its shape");
}
}
template <class V>
migraph::argument run_cpu()
{
V v;
auto p = v.create_program();
auto_print pp{p, 0};
p.compile(migraph::cpu::cpu_target{});
compile_check(p, migraph::cpu::cpu_target{});
migraph::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
{
......@@ -71,7 +111,7 @@ migraph::argument run_gpu()
V v;
auto p = v.create_program();
auto_print pp{p, 1};
p.compile(migraph::gpu::target{});
compile_check(p, migraph::gpu::target{});
migraph::program::parameter_map m;
for(auto&& x : p.get_parameter_shapes())
......@@ -82,32 +122,49 @@ migraph::argument run_gpu()
return migraph::gpu::from_gpu(p.eval(m));
}
template <class V>
void verify_program()
void verify_args(const std::string& name,
const migraph::argument& cpu_arg,
const migraph::argument& gpu_arg)
{
std::set_terminate(+[] {
std::cout << "FAILED: " << migraph::get_type_name<V>() << std::endl;
try
{
std::rethrow_exception(std::current_exception());
}
catch(const std::exception& e)
{
std::cout << " what(): " << e.what() << std::endl;
}
std::cout << std::endl;
for(auto&& handle : auto_print::handlers)
handle();
});
auto cpu_arg_f = detach_async([] { return run_cpu<V>(); });
auto gpu_arg = run_gpu<V>();
visit_all(cpu_arg_f.get(), gpu_arg)([](auto cpu, auto gpu) {
visit_all(cpu_arg, gpu_arg)([&](auto cpu, auto gpu) {
if(not migraph::verify_range(cpu, gpu))
{
// TODO: Check for nans
std::cout << "FAILED: " << migraph::get_type_name<V>() << std::endl;
std::cout << "FAILED: " << name << std::endl;
// std::cout << cpu << std::endl;
// std::cout << gpu << std::endl;
if(migraph::range_zero(cpu))
std::cout << "Cpu data is all zeros" << std::endl;
if(migraph::range_zero(gpu))
std::cout << "Gpu data is all zeros" << std::endl;
auto idx = migraph::mismatch_idx(cpu, gpu, migraph::float_equal);
if(idx < migraph::range_distance(cpu))
{
std::cout << "Mismatch at " << idx << ": " << cpu[idx] << " != " << gpu[idx]
<< std::endl;
}
auto cpu_nan_idx = find_idx(cpu, migraph::not_finite);
if(cpu_nan_idx >= 0)
std::cout << "Non finite number found in cpu at " << cpu_nan_idx << ": "
<< cpu[cpu_nan_idx] << std::endl;
auto gpu_nan_idx = find_idx(gpu, migraph::not_finite);
if(gpu_nan_idx >= 0)
std::cout << "Non finite number found in gpu at " << gpu_nan_idx << ": "
<< gpu[gpu_nan_idx] << std::endl;
}
});
}
template <class V>
void verify_program()
{
auto_print::set_terminate_handler(migraph::get_type_name<V>());
auto cpu_arg_f = detach_async([] { return run_cpu<V>(); });
auto gpu_arg = run_gpu<V>();
verify_args(migraph::get_type_name<V>(), cpu_arg_f.get(), gpu_arg);
std::set_terminate(nullptr);
}
......@@ -255,6 +312,7 @@ struct test_contiguous
migraph::shape s{migraph::shape::float_type, {4, 4, 4, 3}, {48, 4, 1, 16}};
auto x = p.add_parameter("x", s);
p.add_instruction(migraph::contiguous{}, x);
EXPECT(p.get_shape().standard());
return p;
}
};
......@@ -358,4 +416,5 @@ int main()
verify_program<test_transpose>();
verify_program<test_batchnorm_inference>();
verify_program<test_batchnorm_inference_2>();
verify_program<test_conv_bn_relu_pooling>();
}
......@@ -61,7 +61,7 @@ void pytorch_conv_bn_relu_maxpool()
EXPECT(p == prog);
}
void pytorch_conv_relu_maxpoolX2()
void pytorch_conv_relu_maxpool_x2()
{
migraph::program p;
auto l0 = p.add_parameter("0", {migraph::shape::float_type, {1, 3, 32, 32}});
......@@ -92,5 +92,5 @@ int main()
pytorch_conv_bias_test();
pytorch_conv_relu_maxpool();
pytorch_conv_bn_relu_maxpool();
pytorch_conv_relu_maxpoolX2();
pytorch_conv_relu_maxpool_x2();
}
......@@ -17,12 +17,16 @@ namespace migraph {
/// during `eval`.
struct context
{
/// Wait for any tasks in the context to complete
void finish() const;
};
#else
<%
interface('context')
interface('context',
virtual('finish', returns='void', const=True)
)
%>
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment