Unverified Commit ba07b221 authored by Aaron Enye Shi's avatar Aaron Enye Shi Committed by GitHub
Browse files

Fix HIP-Clang GPU build issues (#384)



* Fix HIP-Clang GPU build issues

Add missing device attributes for GPU functions. GPU functions must be annotated with __device__ in HIP.

* Use HIP device function max and min

* Fix clang-format-5.0 issues

* Undo change that breaks on HIP-HCC
Co-authored-by: default avatarmvermeulen <5479696+mvermeulen@users.noreply.github.com>
parent a023ec19
......@@ -95,7 +95,7 @@ inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int
auto nglobal = global.index(nglobal_multi);
return [=](auto f) {
launch(stream, nglobal, nlocal)([=](auto idx) {
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
auto midx = make_multi_index(global, idx.global, nglobal_multi);
f(idx, midx.for_stride(global.lens));
});
......
......@@ -36,7 +36,8 @@ auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, A
MIGRAPHX_TRACE_NARY_FUNCTION
shape s{result.get_shape().type(), result.get_shape().lens()};
hip_visit_all(s, result, args...)([&](auto standard_shape, auto output, auto... inputs) {
mi_gs_launch(stream, standard_shape)([=](auto idx) { output[idx] = f(inputs[idx]...); });
mi_gs_launch(stream,
standard_shape)([=](auto idx) __device__ { output[idx] = f(inputs[idx]...); });
});
}
......@@ -45,7 +46,7 @@ inline auto create_broadcast_index(index_int len, index_int stride)
auto next_stride = stride * len;
auto e_next_stride = encode_divisor(next_stride);
auto e_stride = encode_divisor(stride);
return [=](auto i) {
return [=](auto i) __device__ {
// ( i % next_stride) / stride
return fast_div(i, e_stride) - len * fast_div(i, e_next_stride);
};
......@@ -61,11 +62,11 @@ auto nary_nonstandard_packed_impl(hipStream_t stream,
auto arg_shape = make_array(args...).front().get_shape();
auto perm = find_permutation(arg_shape);
auto s = reorder_shape(arg_shape, perm);
hip_visit_all(s,
result.reshape(reorder_shape(result.get_shape(), perm)),
args.reshape(s)...)([&](auto standard_shape, auto output, auto... inputs) {
mi_gs_launch(stream, standard_shape)([=](auto idx) { output[idx] = f(inputs[idx]...); });
});
hip_visit_all(s, result.reshape(reorder_shape(result.get_shape(), perm)), args.reshape(s)...)(
[&](auto standard_shape, auto output, auto... inputs) {
mi_gs_launch(stream, standard_shape)(
[=](auto idx) __device__ { output[idx] = f(inputs[idx]...); });
});
}
template <class F, class... Arguments>
......@@ -93,7 +94,6 @@ void nary_broadcast_vec_impl(
using type = typename decltype(output)::value_type;
const index_int nelements = output.size() / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size];
// Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
......@@ -185,7 +185,6 @@ void nary_double_broadcast_vec_impl(
using type = typename decltype(output)::value_type;
const index_int nelements = output.size() / vec_size;
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size];
// Load bias into LDS
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
......@@ -274,7 +273,7 @@ void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments.
const index_int vec_size = 4;
auto data = pack_vec<4>(device_cast(inputs.data())...);
auto* outp = as_vec<4>(device_cast(output.data()));
gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) {
gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) __device__ {
vec<type, 4> out = outp[i];
data(
[&](auto... xs) {
......@@ -295,7 +294,7 @@ void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... a
MIGRAPHX_TRACE_NARY_FUNCTION
index_int nelements = result.get_shape().elements();
hip_pointer_visit_all(result, args...)([&](auto output, auto... inputs) {
gs_launch(stream, nelements)([=](auto i) { output[i] = f(inputs[i]...); });
gs_launch(stream, nelements)([=](auto i) __device__ { output[i] = f(inputs[i]...); });
});
}
......
......@@ -24,7 +24,7 @@ void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) {
gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const size_t nb = 4;
auto idx = desc.multi(ii);
std::size_t i_m = idx[dim_1];
......@@ -55,7 +55,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) {
gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const size_t nb = 4;
auto idx = desc.multi(ii);
std::size_t i_n = idx[dim_1];
......
......@@ -9,7 +9,7 @@ namespace device {
void log(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::log(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::log(to_hip_type(x)); });
}
} // namespace device
......
......@@ -43,7 +43,7 @@ void logsoftmax(hipStream_t stream, const argument& result, const argument& arg,
auto log_batch_sum = ::log(to_hip_type(batch_sum)) + batch_max;
idx.local_stride(batch_item_num, [&](auto j) {
idx.local_stride(batch_item_num, [&](auto j) __device__ {
data_idx[axis] = j;
output[data_idx] = input[data_idx] - log_batch_sum;
});
......
......@@ -10,7 +10,7 @@ namespace device {
void max(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::max(to_hip_type(x), to_hip_type(y)); });
[](auto x, auto y) __device__ { return ::max(to_hip_type(x), to_hip_type(y)); });
}
} // namespace device
......
......@@ -10,7 +10,7 @@ namespace device {
void min(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto x, auto y) { return std::min(to_hip_type(x), to_hip_type(y)); });
[](auto x, auto y) __device__ { return ::min(to_hip_type(x), to_hip_type(y)); });
}
} // namespace device
......
......@@ -8,7 +8,7 @@ namespace device {
void mul(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return x * y; });
nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return x * y; });
}
void mul(hipStream_t stream,
......@@ -17,7 +17,8 @@ void mul(hipStream_t stream,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)([](auto x, auto y, auto z) { return x * y * z; });
nary(stream, result, arg1, arg2, arg3)([](auto x, auto y, auto z)
__device__ { return x * y * z; });
}
} // namespace device
......
......@@ -12,7 +12,8 @@ void mul_add(hipStream_t stream,
const argument& arg2,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)([](auto x, auto a, auto b) { return a * x + b; });
nary(stream, result, arg1, arg2, arg3)([](auto x, auto a, auto b)
__device__ { return a * x + b; });
}
} // namespace device
......
......@@ -13,7 +13,7 @@ void mul_add_relu(hipStream_t stream,
const argument& arg3)
{
nary(stream, result, arg1, arg2, arg3)(
[](auto x, auto a, auto b) { return std::max<decltype(a * x + b)>(0, a * x + b); });
[](auto x, auto a, auto b) __device__ { return ::max<decltype(a * x + b)>(0, a * x + b); });
}
} // namespace device
......
......@@ -23,12 +23,12 @@ pad(hipStream_t stream, argument result, argument arg1, float value, std::vector
{
device_val = device_cast(std::numeric_limits<type>::lowest());
}
gs_launch(stream,
result.get_shape().elements())([=](auto i) { output.data()[i] = device_val; });
gs_launch(stream, result.get_shape().elements())(
[=](auto i) __device__ { output.data()[i] = device_val; });
hip_index offsets;
std::copy(pads.begin(), pads.begin() + offsets.size(), offsets.begin());
gs_launch(stream, nelements)([=](auto i) {
gs_launch(stream, nelements)([=](auto i) __device__ {
auto idx = input.get_shape().multi(i);
for(std::size_t j = 0; j < offsets.size(); j++)
{
......
......@@ -9,7 +9,7 @@ namespace device {
void pow(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)(
[](auto b, auto e) { return ::pow(to_hip_type(b), to_hip_type(e)); });
[](auto b, auto e) __device__ { return ::pow(to_hip_type(b), to_hip_type(e)); });
}
} // namespace device
......
......@@ -8,7 +8,7 @@ namespace device {
void relu(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return std::max<decltype(x)>(0, x); });
nary(stream, result, arg)([](auto x) __device__ { return ::max<decltype(x)>(0, x); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void round(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::round(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::round(to_hip_type(x)); });
}
} // namespace device
......
......@@ -9,7 +9,8 @@ namespace device {
void sigmoid(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return 1.f / (1.f + ::exp(to_hip_type(-x))); });
nary(stream, result, arg)([](auto x)
__device__ { return 1.f / (1.f + ::exp(to_hip_type(-x))); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void sign(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); });
nary(stream, result, arg)([](auto x) __device__ { return (x > 0 ? 1 : ((x < 0) ? -1 : 0)); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void sin(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::sin(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::sin(to_hip_type(x)); });
}
} // namespace device
......
......@@ -9,7 +9,7 @@ namespace device {
void sinh(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::sinh(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::sinh(to_hip_type(x)); });
}
} // namespace device
......
......@@ -42,7 +42,7 @@ void softmax(hipStream_t stream, const argument& result, const argument& arg, in
return ::exp(to_hip_type(val));
});
idx.local_stride(batch_item_num, [&](auto j) {
idx.local_stride(batch_item_num, [&](auto j) __device__ {
data_idx[axis] = j;
auto val = input[data_idx] - batch_max;
output[data_idx] = ::exp(to_hip_type(val)) / batch_sum;
......
......@@ -8,7 +8,7 @@ namespace device {
void sqdiff(hipStream_t stream, const argument& result, const argument& arg1, const argument& arg2)
{
nary(stream, result, arg1, arg2)([](auto x, auto y) { return (x - y) * (x - y); });
nary(stream, result, arg1, arg2)([](auto x, auto y) __device__ { return (x - y) * (x - y); });
}
} // namespace device
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment