Unverified Commit ba07b221 authored by Aaron Enye Shi's avatar Aaron Enye Shi Committed by GitHub
Browse files

Fix HIP-Clang GPU build issues (#384)



* Fix HIP-Clang GPU build issues

Add missing device attributes for GPU functions. GPU functions must be annotated with __device__ in HIP.

* Use HIP device function max and min

* Fix clang-format-5.0 issues

* Undo change that breaks on HIP-HCC
Co-authored-by: default avatarmvermeulen <5479696+mvermeulen@users.noreply.github.com>
parent a023ec19
...@@ -95,7 +95,7 @@ inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int ...@@ -95,7 +95,7 @@ inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int
auto nglobal = global.index(nglobal_multi); auto nglobal = global.index(nglobal_multi);
return [=](auto f) { return [=](auto f) {
launch(stream, nglobal, nlocal)([=](auto idx) { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
auto midx = make_multi_index(global, idx.global, nglobal_multi); auto midx = make_multi_index(global, idx.global, nglobal_multi);
f(idx, midx.for_stride(global.lens)); f(idx, midx.for_stride(global.lens));
}); });
......
...@@ -36,7 +36,8 @@ auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, A ...@@ -36,7 +36,8 @@ auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, A
MIGRAPHX_TRACE_NARY_FUNCTION MIGRAPHX_TRACE_NARY_FUNCTION
shape s{result.get_shape().type(), result.get_shape().lens()}; shape s{result.get_shape().type(), result.get_shape().lens()};
hip_visit_all(s, result, args...)([&](auto standard_shape, auto output, auto... inputs) { hip_visit_all(s, result, args...)([&](auto standard_shape, auto output, auto... inputs) {
mi_gs_launch(stream, standard_shape)([=](auto idx) { output[idx] = f(inputs[idx]...); }); mi_gs_launch(stream,
standard_shape)([=](auto idx) __device__ { output[idx] = f(inputs[idx]...); });
}); });
} }
...@@ -45,7 +46,7 @@ inline auto create_broadcast_index(index_int len, index_int stride) ...@@ -45,7 +46,7 @@ inline auto create_broadcast_index(index_int len, index_int stride)
auto next_stride = stride * len; auto next_stride = stride * len;
auto e_next_stride = encode_divisor(next_stride); auto e_next_stride = encode_divisor(next_stride);
auto e_stride = encode_divisor(stride); auto e_stride = encode_divisor(stride);
return [=](auto i) { return [=](auto i) __device__ {
// ( i % next_stride) / stride // ( i % next_stride) / stride
return fast_div(i, e_stride) - len * fast_div(i, e_next_stride); return fast_div(i, e_stride) - len * fast_div(i, e_next_stride);
}; };
...@@ -61,11 +62,11 @@ auto nary_nonstandard_packed_impl(hipStream_t stream, ...@@ -61,11 +62,11 @@ auto nary_nonstandard_packed_impl(hipStream_t stream,
auto arg_shape = make_array(args...).front().get_shape(); auto arg_shape = make_array(args...).front().get_shape();
auto perm = find_permutation(arg_shape); auto perm = find_permutation(arg_shape);
auto s = reorder_shape(arg_shape, perm); auto s = reorder_shape(arg_shape, perm);
hip_visit_all(s, hip_visit_all(s, result.reshape(reorder_shape(result.get_shape(), perm)), args.reshape(s)...)(
result.reshape(reorder_shape(result.get_shape(), perm)), [&](auto standard_shape, auto output, auto... inputs) {
args.reshape(s)...)([&](auto standard_shape, auto output, auto... inputs) { mi_gs_launch(stream, standard_shape)(
mi_gs_launch(stream, standard_shape)([=](auto idx) { output[idx] = f(inputs[idx]...); }); [=](auto idx) __device__ { output[idx] = f(inputs[idx]...); });
}); });
} }
template <class F, class... Arguments> template <class F, class... Arguments>
...@@ -93,7 +94,6 @@ void nary_broadcast_vec_impl( ...@@ -93,7 +94,6 @@ void nary_broadcast_vec_impl(
using type = typename decltype(output)::value_type; using type = typename decltype(output)::value_type;
const index_int nelements = output.size() / vec_size; const index_int nelements = output.size() / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size]; MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
...@@ -185,7 +185,6 @@ void nary_double_broadcast_vec_impl( ...@@ -185,7 +185,6 @@ void nary_double_broadcast_vec_impl(
using type = typename decltype(output)::value_type; using type = typename decltype(output)::value_type;
const index_int nelements = output.size() / vec_size; const index_int nelements = output.size() / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ { launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size]; MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size];
// Load bias into LDS // Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
...@@ -274,7 +273,7 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments. ...@@ -274,7 +273,7 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments.
const index_int vec_size = 4; const index_int vec_size = 4;
auto data = pack_vec<4>(device_cast(inputs.data())...); auto data = pack_vec<4>(device_cast(inputs.data())...);
auto* outp = as_vec<4>(device_cast(output.data())); auto* outp = as_vec<4>(device_cast(output.data()));
gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) { gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) __device__ {
vec<type, 4> out = outp[i]; vec<type, 4> out = outp[i];
data( data(
[&](auto... xs) { [&](auto... xs) {
...@@ -295,7 +294,7 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a ...@@ -295,7 +294,7 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a
MIGRAPHX_TRACE_NARY_FUNCTION MIGRAPHX_TRACE_NARY_FUNCTION
index_int nelements = result.get_shape().elements(); index_int nelements = result.get_shape().elements();
hip_pointer_visit_all(result, args...)([&](auto output, auto... inputs) { hip_pointer_visit_all(result, args...)([&](auto output, auto... inputs) {
gs_launch(stream, nelements)([=](auto i) { output[i] = f(inputs[i]...); }); gs_launch(stream, nelements)([=](auto i) __device__ { output[i] = f(inputs[i]...); });
}); });
} }
......
...@@ -24,7 +24,7 @@ void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument ...@@ -24,7 +24,7 @@ void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) { gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
std::size_t i_m = idx[dim_1]; std::size_t i_m = idx[dim_1];
...@@ -55,7 +55,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument ...@@ -55,7 +55,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
auto* in_ptr = device_cast(input.data()); auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) { visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape); hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) { gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const size_t nb = 4; const size_t nb = 4;
auto idx = desc.multi(ii); auto idx = desc.multi(ii);
std::size_t i_n = idx[dim_1]; std::size_t i_n = idx[dim_1];
......
...@@ -9,7 +9,7 @@ namespace device { ...@@ -9,7 +9,7 @@ namespace device {
void log(hipStream_t stream, const argument& result, const argument& arg) void log(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return ::log(to_hip_type(x)); }); nary(stream, result, arg)([](auto x) __device__ { return ::log(to_hip_type(x)); });
} }
} // namespace device } // namespace device
......
...@@ -43,7 +43,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, ...@@ -43,7 +43,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
auto log_batch_sum = ::log(to_hip_type(batch_sum)) + batch_max; auto log_batch_sum = ::log(to_hip_type(batch_sum)) + batch_max;
idx.local_stride(batch_item_num, [&](auto j) { idx.local_stride(batch_item_num, [&](auto j) __device__ {
data_idx[axis] = j; data_idx[axis] = j;
output[data_idx] = input[data_idx] - log_batch_sum; output[data_idx] = input[data_idx] - log_batch_sum;
}); });
......
...@@ -10,7 +10,7 @@ namespace device { ...@@ -10,7 +10,7 @@ namespace device {
void max(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2) void max(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{ {
nary(stream, result, arg1, arg2)( nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::max(to_hip_type(x), to_hip_type(y)); }); [](auto x, auto y) __device__ { return ::max(to_hip_type(x), to_hip_type(y)); });
} }
} // namespace device } // namespace device
......
...@@ -10,7 +10,7 @@ namespace device { ...@@ -10,7 +10,7 @@ namespace device {
void min(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2) void min(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{ {
nary(stream, result, arg1, arg2)( nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::min(to_hip_type(x), to_hip_type(y)); }); [](auto x, auto y) __device__ { return ::min(to_hip_type(x), to_hip_type(y)); });
} }
} // namespace device } // namespace device
......
...@@ -8,7 +8,7 @@ namespace device { ...@@ -8,7 +8,7 @@ namespace device {
void mul(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2) void mul(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{ {
nary(stream, result, arg1, arg2)([](auto x, auto y) { return x * y; }); nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return x * y; });
} }
void mul(hipStream_t stream, void mul(hipStream_t stream,
...@@ -17,7 +17,8 @@ void mul(hipStream_t stream, ...@@ -17,7 +17,8 @@ void mul(hipStream_t stream,
const argument& arg2, const argument& arg2,
const argument& arg3) const argument& arg3)
{ {
nary(stream, result, arg1, arg2, arg3)([](auto x, auto y, auto z) { return x * y * z; }); nary(stream, result, arg1, arg2, arg3)([](auto x, auto y, auto z)
__device__ { return x * y * z; });
} }
} // namespace device } // namespace device
......
...@@ -12,7 +12,8 @@ void mul_add(hipStream_t stream, ...@@ -12,7 +12,8 @@ void mul_add(hipStream_t stream,
const argument& arg2, const argument& arg2,
const argument& arg3) const argument& arg3)
{ {
nary(stream, result, arg1, arg2, arg3)([](auto x, auto a, auto b) { return a * x + b; }); nary(stream, result, arg1, arg2, arg3)([](auto x, auto a, auto b)
__device__ { return a * x + b; });
} }
} // namespace device } // namespace device
......
...@@ -13,7 +13,7 @@ void mul_add_relu(hipStream_t stream, ...@@ -13,7 +13,7 @@ void mul_add_relu(hipStream_t stream,
const argument& arg3) const argument& arg3)
{ {
nary(stream, result, arg1, arg2, arg3)( nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto a, auto b) { return std::max<decltype(a * x + b)>(0, a * x + b); }); [](auto x, auto a, auto b) __device__ { return ::max<decltype(a * x + b)>(0, a * x + b); });
} }
} // namespace device } // namespace device
......
...@@ -23,12 +23,12 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector ...@@ -23,12 +23,12 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector
{ {
device_val = device_cast(std::numeric_limits<type>::lowest()); device_val = device_cast(std::numeric_limits<type>::lowest());
} }
gs_launch(stream, gs_launch(stream, result.get_shape().elements())(
result.get_shape().elements())([=](auto i) { output.data()[i] = device_val; }); [=](auto i) __device__ { output.data()[i] = device_val; });
hip_index offsets; hip_index offsets;
std::copy(pads.begin(), pads.begin() + offsets.size(), offsets.begin()); std::copy(pads.begin(), pads.begin() + offsets.size(), offsets.begin());
gs_launch(stream, nelements)([=](auto i) { gs_launch(stream, nelements)([=](auto i) __device__ {
auto idx = input.get_shape().multi(i); auto idx = input.get_shape().multi(i);
for(std::size_t j = 0; j < offsets.size(); j++) for(std::size_t j = 0; j < offsets.size(); j++)
{ {
......
...@@ -9,7 +9,7 @@ namespace device { ...@@ -9,7 +9,7 @@ namespace device {
void pow(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2) void pow(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{ {
nary(stream, result, arg1, arg2)( nary(stream, result, arg1, arg2)(
[](auto b, auto e) { return ::pow(to_hip_type(b), to_hip_type(e)); }); [](auto b, auto e) __device__ { return ::pow(to_hip_type(b), to_hip_type(e)); });
} }
} // namespace device } // namespace device
......
...@@ -8,7 +8,7 @@ namespace device { ...@@ -8,7 +8,7 @@ namespace device {
void relu(hipStream_t stream, const argument& result, const argument& arg) void relu(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return std::max<decltype(x)>(0, x); }); nary(stream, result, arg)([](auto x) __device__ { return ::max<decltype(x)>(0, x); });
} }
} // namespace device } // namespace device
......
...@@ -9,7 +9,7 @@ namespace device { ...@@ -9,7 +9,7 @@ namespace device {
void round(hipStream_t stream, const argument& result, const argument& arg) void round(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return ::round(to_hip_type(x)); }); nary(stream, result, arg)([](auto x) __device__ { return ::round(to_hip_type(x)); });
} }
} // namespace device } // namespace device
......
...@@ -9,7 +9,8 @@ namespace device { ...@@ -9,7 +9,8 @@ namespace device {
void sigmoid(hipStream_t stream, const argument& result, const argument& arg) void sigmoid(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return 1.f / (1.f + ::exp(to_hip_type(-x))); }); nary(stream, result, arg)([](auto x)
__device__ { return 1.f / (1.f + ::exp(to_hip_type(-x))); });
} }
} // namespace device } // namespace device
......
...@@ -9,7 +9,7 @@ namespace device { ...@@ -9,7 +9,7 @@ namespace device {
void sign(hipStream_t stream, const argument& result, const argument& arg) void sign(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); }); nary(stream, result, arg)([](auto x) __device__ { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); });
} }
} // namespace device } // namespace device
......
...@@ -9,7 +9,7 @@ namespace device { ...@@ -9,7 +9,7 @@ namespace device {
void sin(hipStream_t stream, const argument& result, const argument& arg) void sin(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return ::sin(to_hip_type(x)); }); nary(stream, result, arg)([](auto x) __device__ { return ::sin(to_hip_type(x)); });
} }
} // namespace device } // namespace device
......
...@@ -9,7 +9,7 @@ namespace device { ...@@ -9,7 +9,7 @@ namespace device {
void sinh(hipStream_t stream, const argument& result, const argument& arg) void sinh(hipStream_t stream, const argument& result, const argument& arg)
{ {
nary(stream, result, arg)([](auto x) { return ::sinh(to_hip_type(x)); }); nary(stream, result, arg)([](auto x) __device__ { return ::sinh(to_hip_type(x)); });
} }
} // namespace device } // namespace device
......
...@@ -42,7 +42,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in ...@@ -42,7 +42,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
return ::exp(to_hip_type(val)); return ::exp(to_hip_type(val));
}); });
idx.local_stride(batch_item_num, [&](auto j) { idx.local_stride(batch_item_num, [&](auto j) __device__ {
data_idx[axis] = j; data_idx[axis] = j;
auto val = input[data_idx] - batch_max; auto val = input[data_idx] - batch_max;
output[data_idx] = ::exp(to_hip_type(val)) / batch_sum; output[data_idx] = ::exp(to_hip_type(val)) / batch_sum;
......
...@@ -8,7 +8,7 @@ namespace device { ...@@ -8,7 +8,7 @@ namespace device {
void sqdiff(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2) void sqdiff(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{ {
nary(stream, result, arg1, arg2)([](auto x, auto y) { return (x - y) * (x - y); }); nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return (x - y) * (x - y); });
} }
} // namespace device } // namespace device
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment