Commit bbb0c645 authored by Paul's avatar Paul
Browse files

Formatting

parent 0c1df49c
...@@ -40,29 +40,25 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f) ...@@ -40,29 +40,25 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f)
return buffer[0]; return buffer[0];
} }
#else #else
constexpr unsigned int dpp_row_shr(unsigned int x) constexpr unsigned int dpp_row_shr(unsigned int x) { return 0x110 | x; }
{
return 0x110 | x;
}
constexpr unsigned int dpp_row_bcast(unsigned int x) constexpr unsigned int dpp_row_bcast(unsigned int x)
{ {
unsigned int y = 0; unsigned int y = 0;
switch(x) switch(x)
{ {
case 15: case 15: y = 0x142; break;
y = 0x142; case 31: y = 0x143; break;
break; default: throw std::runtime_error("Unknown bcast");
case 31:
y = 0x143;
break;
default:
throw std::runtime_error("Unknown bcast");
} }
return y; return y;
} }
template<unsigned int DppCtrl, unsigned int RowMask = 0xf, unsigned int BankMask = 0xf, bool BoundCtrl= false, class T> template <unsigned int DppCtrl,
unsigned int RowMask = 0xf,
unsigned int BankMask = 0xf,
bool BoundCtrl = false,
class T>
__device__ T dpp_mov(T& x) __device__ T dpp_mov(T& x)
{ {
static const std::size_t n = sizeof(T) < 4 ? 1 : sizeof(T) / 4; static const std::size_t n = sizeof(T) < 4 ? 1 : sizeof(T) / 4;
...@@ -74,14 +70,14 @@ __device__ T dpp_mov(T& x) ...@@ -74,14 +70,14 @@ __device__ T dpp_mov(T& x)
type output; type output;
type input; type input;
input.data = x; input.data = x;
for(std::size_t i = 0; i < n;i++) for(std::size_t i = 0; i < n; i++)
{ {
output.reg[i] = __llvm_amdgcn_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl); output.reg[i] = __llvm_amdgcn_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl);
} }
return output.data; return output.data;
} }
template<class T, class Op> template <class T, class Op>
__device__ void dpp_reduce(T& in, Op op) __device__ void dpp_reduce(T& in, Op op)
{ {
T out; T out;
...@@ -123,7 +119,7 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f) ...@@ -123,7 +119,7 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f)
{ {
using type = decltype(f(idx.local)); using type = decltype(f(idx.local));
const auto std::size_t wave = 64; const auto std::size_t wave = 64;
MIGRAPHX_DEVICE_SHARED type buffer[N/64]; MIGRAPHX_DEVICE_SHARED type buffer[N / 64];
type x = init; type x = init;
idx.local_stride(n, [&](auto i) { x = op(x, f(i)); }); idx.local_stride(n, [&](auto i) { x = op(x, f(i)); });
dpp_reduce(x, op); dpp_reduce(x, op);
...@@ -136,7 +132,7 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f) ...@@ -136,7 +132,7 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f)
__syncthreads(); __syncthreads();
type y = 0; type y = 0;
for(std::size_t i = 0; i < idx.nlocal()/64;i++) for(std::size_t i = 0; i < idx.nlocal() / 64; i++)
{ {
y += buffer[i]; y += buffer[i];
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment