#include #include #include #include #include #include #include #include "test.hpp" #include "verify.hpp" using hip_ptr = RTG_MANAGE_PTR(void, hipFree); using miopen_handle = RTG_MANAGE_PTR(miopenHandle_t, miopenDestroy); template Result make_obj(F f, Ts... xs) { typename Result::pointer x = nullptr; auto status = f(&x, xs...); Result r{x}; if(status != miopenStatusSuccess) RTG_THROW("MIOpen call failed"); return r; } hip_ptr hip_allocate(std::size_t sz) { void* result; // TODO: Check status hipMalloc(&result, sz); return hip_ptr{result}; } template hip_ptr write(const T& x) { using type = typename T::value_type; auto size = x.size() * sizeof(type); auto result = hip_allocate(size); // TODO: Check status hipMemcpy(result.get(), x.data(), size, hipMemcpyHostToDevice); return result; } template std::vector read(const void* x, std::size_t sz) { std::vector result(sz); // TODO: Check status hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost); return result; } rtg::program create_program() { rtg::program p; auto input = p.add_parameter("x", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}}); auto weights = p.add_parameter("w", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}}); auto conv = p.add_instruction(rtg::convolution{}, input, weights); p.add_instruction(rtg::activation{"relu"}, conv); return p; } // TODO: Move to header rtg::argument get_tensor_argument_gpu(rtg::shape s) { auto v = rtg::generate_tensor_data(s); auto p = rtg::share(write(v)); return {s, [p]() mutable { return reinterpret_cast(p.get()); }}; } std::vector cpu() { std::vector result; auto p = create_program(); auto x = rtg::generate_argument({rtg::shape::float_type, {4, 3, 3, 3}}); auto w = rtg::generate_argument({rtg::shape::float_type, {4, 3, 3, 3}}); p.compile(rtg::cpu::cpu_target{}); auto r = p.eval({{"x", x}, {"w", w}}); auto output = r.get(); result.assign(output.begin(), output.end()); return result; } std::vector gpu() { std::vector result; auto p = create_program(); auto x = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}}); auto w = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}}); p.compile(rtg::miopen::miopen_target{}); auto y = get_tensor_argument_gpu(p.get_parameter_shape("output")); auto handle = make_obj(&miopenCreate); auto r = p.eval( {{"x", x}, {"w", w}, {"output", y}, {"handle", {rtg::shape::any_type, handle.get()}}}); result = read(r.data(), r.get_shape().elements()); return result; } void test1() { auto x = cpu(); auto y = gpu(); EXPECT(test::verify_range(x, y)); } int main() { test1(); }