Commit d2549384 authored by Khalique's avatar Khalique
Browse files

manual merge

parents 67048d04 ab6cd9d3
...@@ -10,20 +10,34 @@ if(NOT TARGET MIOpen) ...@@ -10,20 +10,34 @@ if(NOT TARGET MIOpen)
message(SEND_ERROR "Cant find miopen") message(SEND_ERROR "Cant find miopen")
endif() endif()
add_library(migraph_device add_library(migraphx_device
device/add.cpp device/add.cpp
device/max.cpp
device/min.cpp
device/exp.cpp
device/log.cpp
device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/add_relu.cpp device/add_relu.cpp
device/contiguous.cpp device/contiguous.cpp
device/mul.cpp device/mul.cpp
device/concat.cpp device/concat.cpp
device/pad.cpp
device/gather.cpp
) )
set_target_properties(migraph_device PROPERTIES EXPORT_NAME device) set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_clang_tidy_check(migraph_device) rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraph_device migraph hip::device) target_link_libraries(migraphx_device migraphx hip::device -Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx906)
target_include_directories(migraph_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraph_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>) target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(migraph_gpu add_library(migraphx_gpu
eliminate_workspace.cpp eliminate_workspace.cpp
fuse_ops.cpp fuse_ops.cpp
hip.cpp hip.cpp
...@@ -37,20 +51,24 @@ add_library(migraph_gpu ...@@ -37,20 +51,24 @@ add_library(migraph_gpu
concat.cpp concat.cpp
relu.cpp relu.cpp
leaky_relu.cpp leaky_relu.cpp
add.cpp tanh.cpp
mul.cpp
batchnorm.cpp batchnorm.cpp
write_literals.cpp write_literals.cpp
rocblas.cpp rocblas.cpp
sigmoid.cpp
abs.cpp
elu.cpp
pad.cpp
gather.cpp
lrn.cpp lrn.cpp
) )
set_target_properties(migraph_gpu PROPERTIES EXPORT_NAME gpu) set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
rocm_clang_tidy_check(migraph_gpu) rocm_clang_tidy_check(migraphx_gpu)
target_link_libraries(migraph_gpu PUBLIC migraph MIOpen roc::rocblas) target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraph_gpu PRIVATE migraph_device) target_link_libraries(migraphx_gpu PRIVATE migraphx_device)
rocm_install_targets( rocm_install_targets(
TARGETS migraph_gpu migraph_device TARGETS migraphx_gpu migraphx_device
INCLUDE INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include
) )
......
#include <migraphx/gpu/abs.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).not_broadcasted();
return inputs.at(1);
}
argument miopen_abs::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1;
float beta = 0;
auto x_desc = make_tensor(args[0].get_shape());
auto y_desc = make_tensor(output_shape);
miopenActivationForward(ctx.get_stream().get_miopen(),
ad.get(),
&alpha,
x_desc.get(),
args[0].implicit(),
&beta,
y_desc.get(),
args[1].implicit());
return args[1];
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/add.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/config.hpp>
#include <migraph/gpu/miopen.hpp>
#include <utility>
namespace migraph {
inline namespace MIGRAPH_INLINE_NS {
namespace gpu {
shape hip_add::compute_shape(const std::vector<shape>& inputs) const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes{inputs, *this}.has(3);
return inputs.at(0);
}
argument hip_add::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
device::add(ctx.get_stream().get(), args[2], args[0], args[1]);
return args[2];
}
shape miopen_add::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(3).not_broadcasted();
return inputs.at(0);
}
argument miopen_add::compute(context& ctx,
const shape& output_shape,
const std::vector<argument>& args) const
{
float alpha = 1, beta = 0;
auto a_desc = make_tensor(args[0].get_shape());
auto b_desc = make_tensor(args[1].get_shape());
auto c_desc = make_tensor(output_shape);
miopenOpTensor(ctx.get_stream().get_miopen(),
miopenTensorOpAdd,
&alpha,
a_desc.get(),
args[0].implicit(),
&alpha,
b_desc.get(),
args[1].implicit(),
&beta,
c_desc.get(),
args[2].implicit());
return args[2];
}
} // namespace gpu
} // namespace MIGRAPH_INLINE_NS
} // namespace migraph
#include <migraph/gpu/batchnorm.hpp> #include <migraphx/gpu/batchnorm.hpp>
#include <migraph/operators.hpp> #include <migraphx/operators.hpp>
#include <migraph/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <utility> #include <utility>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape miopen_batch_norm_inference::compute_shape(const std::vector<shape>& inputs) const shape miopen_batch_norm_inference::compute_shape(const std::vector<shape>& inputs) const
...@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx, ...@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
auto bn_desc = make_tensor(args[3].get_shape()); auto bn_desc = make_tensor(args[3].get_shape());
float alpha = 1.0, beta = 0.0f; float alpha = 1.0;
float beta = 0.0f;
miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(), miopenBatchNormalizationForwardInference(ctx.get_stream().get_miopen(),
miopenBatchNormMode_t(op.bn_mode), miopenBatchNormMode_t(op.bn_mode),
...@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx, ...@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraph/gpu/concat.hpp> #include <migraphx/gpu/concat.hpp>
#include <migraph/operators.hpp> #include <migraphx/operators.hpp>
#include <migraph/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <migraph/gpu/device/concat.hpp> #include <migraphx/gpu/device/concat.hpp>
#include <utility> #include <utility>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape hip_concat::compute_shape(std::vector<shape> inputs) const shape hip_concat::compute_shape(std::vector<shape> inputs) const
...@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx, ...@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraph/gpu/contiguous.hpp> #include <migraphx/gpu/contiguous.hpp>
#include <migraph/operators.hpp> #include <migraphx/operators.hpp>
#include <migraph/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <utility> #include <utility>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape miopen_contiguous::compute_shape(const std::vector<shape>& inputs) const shape miopen_contiguous::compute_shape(const std::vector<shape>& inputs) const
...@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx, ...@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraph/gpu/convolution.hpp> #include <migraphx/gpu/convolution.hpp>
#include <migraph/operators.hpp> #include <migraphx/operators.hpp>
#include <migraph/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp> #include <migraphx/gpu/miopen.hpp>
#include <utility> #include <utility>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const shape miopen_convolution::compute_shape(const std::vector<shape>& inputs) const
...@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx, ...@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
auto w_desc = make_tensor(args[1].get_shape()); auto w_desc = make_tensor(args[1].get_shape());
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
float alpha = 1, beta = 0; float alpha = 1;
float beta = 0;
miopenConvolutionForward(ctx.get_stream().get_miopen(), miopenConvolutionForward(ctx.get_stream().get_miopen(),
&alpha, &alpha,
x_desc.get(), x_desc.get(),
...@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx, ...@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
shape miopen_convolution::compile(context& ctx, shape miopen_convolution::compile(context& ctx,
const shape& output_shape, const shape& output_shape,
std::vector<instruction_ref> inputs) std::vector<shape> inputs)
{ {
shape workspace_shape{}; shape workspace_shape{};
auto x_desc = make_tensor(inputs[0]->get_shape()); auto x_desc = make_tensor(inputs[0]);
auto w_desc = make_tensor(inputs[1]->get_shape()); auto w_desc = make_tensor(inputs[1]);
auto y_desc = make_tensor(output_shape); auto y_desc = make_tensor(output_shape);
std::size_t workspace_size = 0; std::size_t workspace_size = 0;
...@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx, ...@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
&workspace_size); &workspace_size);
workspace_shape = shape{shape::int8_type, {workspace_size}}; workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x = to_gpu(generate_argument(inputs[0]->get_shape())); auto x = to_gpu(generate_argument(inputs[0]));
auto w = to_gpu(generate_argument(inputs[1]->get_shape())); auto w = to_gpu(generate_argument(inputs[1]));
auto y = allocate_gpu(output_shape); auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape); auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1; int algo_count = 1;
miopenConvAlgoPerf_t perf; miopenConvAlgoPerf_t perf;
miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(), auto status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
x_desc.get(), x_desc.get(),
x.implicit(), x.implicit(),
w_desc.get(), w_desc.get(),
w.implicit(), w.implicit(),
cd.get(), cd.get(),
y_desc.get(), y_desc.get(),
y.implicit(), y.implicit(),
1, 1,
&algo_count, &algo_count,
&perf, &perf,
workspace.implicit(), workspace.implicit(),
workspace_size, workspace_size,
false); false);
algo = perf.fwd_algo; if(status != miopenStatusSuccess)
MIGRAPHX_THROW("Find convolution failed");
handle = ctx.get_stream().get_miopen();
algo = perf.fwd_algo;
return shape{shape::int8_type, {perf.memory}}; return shape{shape::int8_type, {perf.memory}};
} }
void miopen_convolution::finalize(context& ctx,
const shape& output_shape,
std::vector<shape> inputs)
{
if(handle == ctx.get_stream().get_miopen())
return;
// TODO: Check that workspace hasn't changed
compile(ctx, output_shape, std::move(inputs));
}
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void acos(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::acos(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/gpu/device/add.hpp> #include <migraphx/gpu/device/add.hpp>
#include <migraph/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -22,5 +22,5 @@ void add(hipStream_t stream, ...@@ -22,5 +22,5 @@ void add(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraph/gpu/device/add_relu.hpp> #include <migraphx/gpu/device/add_relu.hpp>
#include <migraph/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream, ...@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void asin(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::asin(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void atan(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::atan(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraph/shape.hpp> #include <migraphx/shape.hpp>
#include <migraph/argument.hpp> #include <migraphx/argument.hpp>
#include <migraph/gpu/device/concat.hpp> #include <migraphx/gpu/device/concat.hpp>
#include <migraph/gpu/device/tensor.hpp> #include <migraphx/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp> #include <migraphx/gpu/device/launch.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
argument concat(hipStream_t stream, argument concat(hipStream_t stream,
const migraph::shape& output_shape, const migraphx::shape& output_shape,
std::vector<migraph::argument> args, std::vector<migraphx::argument> args,
std::vector<std::size_t> offsets) std::vector<std::size_t> offsets)
{ {
for(std::size_t l = 0; l < args.size() - 1; l++) for(std::size_t l = 0; l < args.size() - 1; l++)
...@@ -34,5 +34,5 @@ argument concat(hipStream_t stream, ...@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraph/gpu/device/contiguous.hpp> #include <migraphx/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/nary.hpp> #include <migraphx/gpu/device/nary.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg) ...@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg)
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#include <migraphx/gpu/device/cos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void cos(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::cos(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/cosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void cosh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::cosh(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/device/exp.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
void exp(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::exp(to_hip_type(x)); });
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/gather.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
argument gather(hipStream_t stream,
const migraphx::shape& output_shape,
std::vector<migraphx::argument> args,
int axis)
{
int axis_index = (axis < 0) ? (axis + output_shape.lens().size()) : axis;
visit_all(args.back(), args[0])([&](auto output, auto input) {
std::size_t nelements = output_shape.elements();
args[1].visit([&](auto indices) {
visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
const auto* indices_ptr = device_cast(indices.data());
auto* outptr = device_cast(output.data());
const auto* inptr = device_cast(input.data());
hip_tensor_descriptor<ndim> desc_input(input.get_shape());
hip_tensor_descriptor<ndim> desc_output(output.get_shape());
gs_launch(stream, nelements)([=](auto i) {
auto lens = desc_output.multi(i);
lens[axis_index] = indices_ptr[lens[axis_index]];
outptr[i] = inptr[desc_input.linear(lens)];
});
});
});
});
return args.back();
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <migraph/config.hpp> #include <migraphx/config.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102 ...@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102
// Workaround hcc's broken tile_static macro // Workaround hcc's broken tile_static macro
#ifdef tile_static #ifdef tile_static
#undef tile_static #undef tile_static
#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static)) #define MIGRAPHX_DEVICE_SHARED __attribute__((tile_static))
#else #else
#define MIGRAPH_DEVICE_SHARED __shared__ #define MIGRAPHX_DEVICE_SHARED __shared__
#endif #endif
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#endif #endif
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP #ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP #define MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
#include <migraph/gpu/device/tensor.hpp> #include <migraphx/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp> #include <migraphx/gpu/device/launch.hpp>
#include <migraph/gpu/device/types.hpp> #include <migraphx/gpu/device/types.hpp>
#include <migraph/functional.hpp> #include <migraphx/functional.hpp>
#include <migraph/ranges.hpp> #include <migraphx/ranges.hpp>
#include <migraph/config.hpp> #include <migraphx/config.hpp>
namespace migraph { namespace migraphx {
inline namespace MIGRAPH_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
...@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream, ...@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
const std::size_t bdim_vec_len = bdim_len / vec_size; const std::size_t bdim_vec_len = bdim_len / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size]; MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
{ {
...@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream, ...@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream,
const std::size_t n = output.size(); const std::size_t n = output.size();
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED type buffer[2048]; MIGRAPHX_DEVICE_SHARED type buffer[2048];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_len; i += nlocal) for(size_t i = idx.local; i < bdim_len; i += nlocal)
{ {
...@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl( ...@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl(
const std::size_t bdim_vec_len = bdim_len / vec_size; const std::size_t bdim_vec_len = bdim_len / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED vec4<type> buffer[2048 / vec_size]; MIGRAPHX_DEVICE_SHARED vec4<type> buffer[2048 / vec_size];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
{ {
...@@ -243,7 +243,7 @@ void binary_broadcast_impl( ...@@ -243,7 +243,7 @@ void binary_broadcast_impl(
const std::size_t n = output.size(); const std::size_t n = output.size();
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPH_DEVICE_SHARED type buffer[2048]; MIGRAPHX_DEVICE_SHARED type buffer[2048];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_len; i += nlocal) for(size_t i = idx.local; i < bdim_len; i += nlocal)
{ {
...@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args) ...@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
nary_nonstandard_impl(stream, f, result, args...); nary_nonstandard_impl(stream, f, result, args...);
} }
template <class F>
void nary_impl(hipStream_t stream, F f, argument result)
{
nary_standard_impl(stream, f, result);
}
template <class... Arguments> template <class... Arguments>
auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args) auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args)
{ {
...@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream, ...@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream,
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
} // namespace MIGRAPH_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraph } // namespace migraphx
#endif #endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment