Commit 23ef67a7 authored by Scott Thornton's avatar Scott Thornton
Browse files

Merge branch 'master' into onnx_fixes

parents 7e23d5c4 68d69739
......@@ -10,13 +10,20 @@ if(NOT TARGET MIOpen)
message(SEND_ERROR "Cant find miopen")
endif()
add_library(migraph_miopen
add_library(migraph_device
hip_contiguous.cpp
)
rocm_clang_tidy_check(migraph_device)
target_link_libraries(migraph_device migraph hip::device)
target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
add_library(migraph_gpu
hip.cpp
target.cpp
lowering.cpp
write_literals.cpp
rocblas.cpp
)
rocm_clang_tidy_check(migraph_miopen)
target_link_libraries(migraph_miopen migraph MIOpen roc::rocblas)
target_include_directories(migraph_miopen PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
rocm_clang_tidy_check(migraph_gpu)
target_link_libraries(migraph_gpu migraph MIOpen migraph_device roc::rocblas)
target_include_directories(migraph_gpu PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
#include <migraph/miopen/hip.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/manage_ptr.hpp>
#include <miopen/miopen.h>
......@@ -7,7 +7,7 @@
#include <vector>
namespace migraph {
namespace miopen {
namespace gpu {
using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
......@@ -67,6 +67,6 @@ migraph::argument from_gpu(migraph::argument arg)
return result;
}
} // namespace miopen
} // namespace gpu
} // namespace migraph
#include <hip/hip_runtime.h>
#include <migraph/operators.hpp>
namespace migraph {
namespace gpu {
template <class F>
void visit_tensor_size(std::size_t n, F f)
{
switch(n)
{
case 0:
{
f(std::integral_constant<std::size_t, 0>{});
break;
}
case 1:
{
f(std::integral_constant<std::size_t, 1>{});
break;
}
case 2:
{
f(std::integral_constant<std::size_t, 2>{});
break;
}
case 3:
{
f(std::integral_constant<std::size_t, 3>{});
break;
}
case 4:
{
f(std::integral_constant<std::size_t, 4>{});
break;
}
case 5:
{
f(std::integral_constant<std::size_t, 5>{});
break;
}
default: throw std::runtime_error("Unknown tensor size");
}
}
template <size_t NDim>
struct hip_index
{
size_t d[NDim];
__device__ __host__ size_t& operator[](size_t i) { return d[i]; }
__device__ __host__ size_t operator[](size_t i) const { return d[i]; }
};
template <size_t NDim>
struct hip_tensor_descriptor
{
__device__ __host__ hip_tensor_descriptor() = default;
template <typename T, typename V>
__device__ __host__ hip_tensor_descriptor(const T& lens_ext, const V& strides_ext)
{
for(size_t i = 0; i < NDim; i++)
lens[i] = lens_ext[i];
for(size_t i = 0; i < NDim; i++)
strides[i] = strides_ext[i];
}
__device__ __host__ hip_index<NDim> multi(size_t idx)
{
hip_index<NDim> result{};
size_t tidx = idx;
for(size_t is = 0; is < NDim; is++)
{
result[is] = tidx / strides[is];
tidx = tidx % strides[is];
}
return result;
}
__device__ __host__ size_t linear(hip_index<NDim> s)
{
size_t idx = 0;
for(size_t i = 0; i < NDim; i++)
idx += s[i] * strides[i];
return idx;
}
size_t lens[NDim] = {};
size_t strides[NDim] = {};
};
template <typename T, size_t NDim>
__global__ void contiguous_gpu(const T* a,
hip_tensor_descriptor<NDim> a_desc,
T* at,
hip_tensor_descriptor<NDim> at_desc,
size_t nelements)
{
for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < nelements;
i += blockDim.x * gridDim.x)
{
hip_index<NDim> s = at_desc.multi(i);
size_t lidx = a_desc.linear(s);
at[i] = a[lidx];
}
}
void hip_contiguous(migraph::shape output_shape, migraph::argument arg, migraph::argument result)
{
size_t ndim = output_shape.lens().size();
visit_all(result, arg)([&](auto output, auto input) {
if(ndim == 4)
{
const auto& s = arg.get_shape();
hip_tensor_descriptor<4> a_desc(s.lens(), s.strides());
hip_tensor_descriptor<4> at_desc(output_shape.lens(), output_shape.strides());
dim3 nblocks(512);
dim3 nthreads(512);
hipLaunchKernelGGL((contiguous_gpu<int, 4>),
nblocks,
nthreads,
0,
nullptr,
input.data(),
a_desc,
output.data(),
at_desc,
s.elements());
}
else
{
MIGRAPH_THROW("contiguous is only valid for 4D tensors");
}
});
}
} // namespace gpu
} // namespace migraph
#ifndef MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPH_GUARD_RTGLIB_CONTEXT_HPP
#include <migraph/miopen/miopen.hpp>
#include <migraph/miopen/rocblas.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/rocblas.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
struct context
{
......@@ -13,7 +13,7 @@ struct context
shared<rocblas_handle_ptr> rbhandle;
};
} // namespace miopen
} // namespace gpu
} // namespace migraph
......
......@@ -4,7 +4,7 @@
#include <migraph/operators.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
migraph::argument allocate_gpu(migraph::shape s);
......@@ -40,7 +40,7 @@ struct hip_write
}
};
} // namespace miopen
} // namespace gpu
} // namespace migraph
......
#ifndef MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP
#define MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP
namespace migraph {
namespace gpu {
void hip_contiguous(migraph::shape output_shape, migraph::argument arg, migraph::argument result);
} // namespace gpu
} // namespace migraph
#endif
......@@ -4,15 +4,15 @@
#include <migraph/program.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
struct lowering
{
std::string name() const { return "miopen::lowering"; }
std::string name() const { return "gpu::lowering"; }
void apply(program& p) const;
};
} // namespace miopen
} // namespace gpu
} // namespace migraph
......
......@@ -6,7 +6,7 @@
#include <miopen/miopen.h>
namespace migraph {
namespace miopen {
namespace gpu {
using miopen_handle = MIGRAPH_MANAGE_PTR(miopenHandle_t, miopenDestroy);
using tensor_descriptor = MIGRAPH_MANAGE_PTR(miopenTensorDescriptor_t,
......@@ -84,7 +84,7 @@ inline activation_descriptor make_relu()
return ad;
}
} // namespace miopen
} // namespace gpu
} // namespace migraph
......
......@@ -6,13 +6,13 @@
#include <rocblas.h>
namespace migraph {
namespace miopen {
namespace gpu {
using rocblas_handle_ptr = MIGRAPH_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle);
rocblas_handle_ptr create_rocblas_handle_ptr();
} // namespace miopen
} // namespace gpu
} // namespace migraph
......
......@@ -4,7 +4,7 @@
#include <migraph/program.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
struct target
{
......@@ -13,7 +13,7 @@ struct target
migraph::context get_context() const;
};
} // namespace miopen
} // namespace gpu
} // namespace migraph
......
......@@ -5,16 +5,16 @@
namespace migraph {
namespace miopen {
namespace gpu {
struct write_literals
{
std::string name() const { return "miopen::write_literals"; }
std::string name() const { return "gpu::write_literals"; }
void apply(program& p) const;
};
} // namespace miopen
} // namespace gpu
} // namespace migraph
......
#include <rocblas.h>
#include <migraph/miopen/lowering.hpp>
#include <migraph/gpu/lowering.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/shape_for_each.hpp>
#include <migraph/miopen/miopen.hpp>
#include <migraph/miopen/hip.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/dfor.hpp>
#include <migraph/gpu/kernels.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/miopen/rocblas.hpp>
#include <migraph/miopen/context.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
struct miopen_convolution
{
convolution op;
shared<convolution_descriptor> cd;
std::string name() const { return "miopen::convolution"; }
std::string name() const { return "gpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3);
......@@ -70,7 +71,7 @@ struct miopen_pooling
pooling op;
shared<pooling_descriptor> pd;
std::string name() const { return "miopen::pooling"; }
std::string name() const { return "gpu::pooling"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
......@@ -101,7 +102,7 @@ struct miopen_pooling
struct miopen_add
{
std::string name() const { return "miopen::add"; }
std::string name() const { return "gpu::add"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3);
......@@ -148,7 +149,7 @@ struct miopen_add
struct miopen_gemm
{
gemm op;
std::string name() const { return "miopen::convolution"; }
std::string name() const { return "gpu::convolution"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(3);
......@@ -182,10 +183,42 @@ struct miopen_gemm
}
};
struct miopen_transpose
{
transpose op;
std::string name() const { return "gpu::transpose"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
return op.compute_shape({inputs.at(0)});
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
return {output_shape, std::move(args.front().data)};
}
};
struct miopen_contiguous
{
contiguous op;
std::string name() const { return "gpu::contiguous"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
return op.compute_shape({inputs.at(0)});
}
argument compute(context&, shape output_shape, std::vector<argument> args) const
{
hip_contiguous(output_shape, args.at(0), args.at(1));
return args.at(1);
}
};
struct miopen_relu
{
shared<activation_descriptor> ad;
std::string name() const { return "miopen::relu"; }
std::string name() const { return "gpu::relu"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this}.has(2);
......@@ -239,6 +272,14 @@ struct miopen_apply
{
apply_gemm(it);
}
else if(it->op.name() == "transpose")
{
apply_transpose(it);
}
else if(it->op.name() == "contiguous")
{
apply_contiguous(it);
}
}
}
......@@ -305,10 +346,24 @@ struct miopen_apply
prog->replace_instruction(
ins, miopen_gemm{op}, ins->arguments.at(0), ins->arguments.at(1), output);
}
void apply_transpose(instruction_ref ins)
{
auto&& op = any_cast<transpose>(ins->op);
auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(ins, miopen_transpose{op}, ins->arguments.at(0), output);
}
void apply_contiguous(instruction_ref ins)
{
auto&& op = any_cast<contiguous>(ins->op);
auto output = insert_allocation(ins, ins->result);
prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output);
}
};
void lowering::apply(program& p) const { miopen_apply{&p}.apply(); }
} // namespace miopen
} // namespace gpu
} // namespace migraph
#include <migraph/miopen/rocblas.hpp>
#include <migraph/gpu/rocblas.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
rocblas_handle_ptr create_rocblas_handle_ptr()
{
......@@ -10,6 +10,6 @@ rocblas_handle_ptr create_rocblas_handle_ptr()
return rocblas_handle_ptr{handle};
}
} // namespace miopen
} // namespace gpu
} // namespace migraph
#include <migraph/miopen/target.hpp>
#include <migraph/miopen/lowering.hpp>
#include <migraph/miopen/write_literals.hpp>
#include <migraph/miopen/context.hpp>
#include <migraph/gpu/target.hpp>
#include <migraph/gpu/lowering.hpp>
#include <migraph/gpu/write_literals.hpp>
#include <migraph/gpu/context.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
std::vector<pass> target::get_passes(migraph::context&) const
{
......@@ -19,6 +19,6 @@ migraph::context target::get_context() const
share(create_rocblas_handle_ptr())};
}
} // namespace miopen
} // namespace gpu
} // namespace migraph
#include <migraph/miopen/write_literals.hpp>
#include <migraph/gpu/write_literals.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/miopen/hip.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/instruction.hpp>
namespace migraph {
namespace miopen {
namespace gpu {
void write_literals::apply(program& p) const
{
......@@ -20,6 +20,6 @@ void write_literals::apply(program& p) const
}
}
} // namespace miopen
} // namespace gpu
} // namespace migraph
......@@ -95,13 +95,13 @@ foreach(TEST ${TESTS})
add_test_executable(test_${BASE_NAME} ${TEST})
endforeach()
if(MIGRAPH_ENABLE_MIOPEN)
# miopen tests
file(GLOB MIOPEN_TESTS miopen/*.cpp)
if(MIGRAPH_ENABLE_GPU)
# gpu tests
file(GLOB GPU_TESTS gpu/*.cpp)
foreach(TEST ${MIOPEN_TESTS})
foreach(TEST ${GPU_TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_miopen_${BASE_NAME} ${TEST})
target_link_libraries(test_miopen_${BASE_NAME} migraph_miopen)
add_test_executable(test_gpu_${BASE_NAME} ${TEST})
target_link_libraries(test_gpu_${BASE_NAME} migraph_gpu)
endforeach()
endif()
......@@ -3,9 +3,9 @@
#include <migraph/operators.hpp>
#include <migraph/generate.hpp>
#include <migraph/cpu/cpu_target.hpp>
#include <migraph/miopen/target.hpp>
#include <migraph/miopen/miopen.hpp>
#include <migraph/miopen/hip.hpp>
#include <migraph/gpu/target.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/manage_ptr.hpp>
#include <miopen/miopen.h>
......@@ -27,18 +27,17 @@ migraph::argument run_gpu()
{
V v;
auto p = v.create_program();
p.compile(migraph::miopen::target{});
p.compile(migraph::gpu::target{});
auto m = v.create_params();
for(auto&& e : m)
{
e.second = migraph::miopen::to_gpu(e.second);
e.second = migraph::gpu::to_gpu(e.second);
}
m["output"] =
migraph::miopen::to_gpu(migraph::generate_argument(p.get_parameter_shape("output")));
m["output"] = migraph::gpu::to_gpu(migraph::generate_argument(p.get_parameter_shape("output")));
return migraph::miopen::from_gpu(p.eval(m));
return migraph::gpu::from_gpu(p.eval(m));
}
template <class V>
......@@ -175,6 +174,47 @@ struct test_gemm
}
};
struct test_contiguous
{
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::float_type, {4, 4, 4, 3}, {48, 4, 1, 16}};
auto x = p.add_parameter("x", s);
p.add_instruction(migraph::contiguous{}, x);
return p;
}
migraph::program::parameter_map create_params() const
{
migraph::program::parameter_map m;
m["x"] =
migraph::generate_argument({migraph::shape::float_type, {4, 4, 4, 3}, {48, 4, 1, 16}});
return m;
}
};
struct test_transpose
{
migraph::program create_program() const
{
migraph::program p;
migraph::shape s{migraph::shape::float_type, {4, 3, 4, 4}};
auto x = p.add_parameter("x", s);
std::vector<int64_t> perm = {0, 2, 3, 1};
auto l = p.add_instruction(migraph::transpose{perm}, x);
p.add_instruction(migraph::contiguous{}, l);
return p;
}
migraph::program::parameter_map create_params() const
{
migraph::program::parameter_map m;
m["x"] = migraph::generate_argument({migraph::shape::float_type, {4, 3, 4, 4}});
return m;
}
};
int main()
{
verify_program<test_add>();
......@@ -182,4 +222,6 @@ int main()
verify_program<test_conv_relu>();
verify_program<test_conv_pooling>();
verify_program<test_gemm>();
verify_program<test_contiguous>();
verify_program<test_transpose>();
}
......@@ -9,10 +9,23 @@
namespace migraph {
#ifdef DOXYGEN
/// A context is used to store internal data for a `target`. A context is
/// constructed by a target during compilation and passed to the operations
/// during `eval`.
struct context
{
};
#else
<%
interface('context')
%>
#endif
} // namespace migraph
#endif
......@@ -13,6 +13,37 @@
namespace migraph {
#ifdef DOXYGEN
/// The operation interface represents an action an instruction will perform. All
/// operation classes must be CopyConstructible.
struct operation
{
/// A unique name identifying the operation
std::string name() const;
/// This is used to compute the resulting shape from an operation. If an
/// operation cannot be run with input shapes, then it should throw an
/// exception.
shape compute_shape(std::vector<shape> input) const;
/**
* @brief This performs the operation's computation
*
* @param ctx This is the context created by the `target` during compilation. Implementations
* can use the target's `context` class rather than the `context` interface class.
* @param output This is the output shape. It is equivalent to running `compute_shape` with each
* `shape` of the `argument`.
* @param input This is the `argument` result from the previous instuction's computation.
* @return Return an `argument` of the result computation. The `shape` of `argument` should be
* the same the `output` shape.
*/
argument compute(context& ctx, shape output, std::vector<argument> input) const;
/// An optional stream operator to print the operation. When this is not
/// implemented, it will just print the operation's name.
friend std::ostream& operator<<(std::ostream& os, const operation& op);
};
#else
namespace operation_stream {
template <class T>
......@@ -38,6 +69,8 @@ interface('operation',
)
%>
#endif
} // namespace migraph
#endif
......@@ -11,6 +11,20 @@ namespace migraph {
struct program;
#ifdef DOXYGEN
/// An interface for applying a transformation to the instructions in a
/// `program`
struct pass
{
/// A unique name used to identify the pass
std::string name() const;
/// Run the pass on the program
void apply(program& p) const;
};
#else
<%
interface('pass',
virtual('name', returns='std::string', const=True),
......@@ -18,6 +32,8 @@ interface('pass',
)
%>
#endif
} // namespace migraph
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment