Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
bbb0c645
Commit
bbb0c645
authored
Jun 27, 2019
by
Paul
Browse files
Formatting
parent
0c1df49c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
25 deletions
+21
-25
src/targets/gpu/device/reduce_sum.cpp
src/targets/gpu/device/reduce_sum.cpp
+21
-25
No files found.
src/targets/gpu/device/reduce_sum.cpp
View file @
bbb0c645
...
@@ -40,29 +40,25 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f)
...
@@ -40,29 +40,25 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f)
return
buffer
[
0
];
return
buffer
[
0
];
}
}
#else
#else
constexpr
unsigned
int
dpp_row_shr
(
unsigned
int
x
)
constexpr
unsigned
int
dpp_row_shr
(
unsigned
int
x
)
{
return
0x110
|
x
;
}
{
return
0x110
|
x
;
}
constexpr
unsigned
int
dpp_row_bcast
(
unsigned
int
x
)
constexpr
unsigned
int
dpp_row_bcast
(
unsigned
int
x
)
{
{
unsigned
int
y
=
0
;
unsigned
int
y
=
0
;
switch
(
x
)
switch
(
x
)
{
{
case
15
:
case
15
:
y
=
0x142
;
break
;
y
=
0x142
;
case
31
:
y
=
0x143
;
break
;
break
;
default:
throw
std
::
runtime_error
(
"Unknown bcast"
);
case
31
:
y
=
0x143
;
break
;
default:
throw
std
::
runtime_error
(
"Unknown bcast"
);
}
}
return
y
;
return
y
;
}
}
template
<
unsigned
int
DppCtrl
,
unsigned
int
RowMask
=
0xf
,
unsigned
int
BankMask
=
0xf
,
bool
BoundCtrl
=
false
,
class
T
>
template
<
unsigned
int
DppCtrl
,
unsigned
int
RowMask
=
0xf
,
unsigned
int
BankMask
=
0xf
,
bool
BoundCtrl
=
false
,
class
T
>
__device__
T
dpp_mov
(
T
&
x
)
__device__
T
dpp_mov
(
T
&
x
)
{
{
static
const
std
::
size_t
n
=
sizeof
(
T
)
<
4
?
1
:
sizeof
(
T
)
/
4
;
static
const
std
::
size_t
n
=
sizeof
(
T
)
<
4
?
1
:
sizeof
(
T
)
/
4
;
...
@@ -74,29 +70,29 @@ __device__ T dpp_mov(T& x)
...
@@ -74,29 +70,29 @@ __device__ T dpp_mov(T& x)
type
output
;
type
output
;
type
input
;
type
input
;
input
.
data
=
x
;
input
.
data
=
x
;
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
{
{
output
.
reg
[
i
]
=
__llvm_amdgcn_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
output
.
reg
[
i
]
=
__llvm_amdgcn_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
}
}
return
output
.
data
;
return
output
.
data
;
}
}
template
<
class
T
,
class
Op
>
template
<
class
T
,
class
Op
>
__device__
void
dpp_reduce
(
T
&
in
,
Op
op
)
__device__
void
dpp_reduce
(
T
&
in
,
Op
op
)
{
{
T
out
;
T
out
;
out
=
dpp_mov
<
dpp_row_shr
(
1
)
>
(
in
);
out
=
dpp_mov
<
dpp_row_shr
(
1
)
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
2
)
>
(
in
);
out
=
dpp_mov
<
dpp_row_shr
(
2
)
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
4
),
0xf
,
0xe
>
(
in
);
out
=
dpp_mov
<
dpp_row_shr
(
4
),
0xf
,
0xe
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
8
),
0xf
,
0xc
>
(
in
);
out
=
dpp_mov
<
dpp_row_shr
(
8
),
0xf
,
0xc
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_bcast
(
15
),
0xa
>
(
in
);
out
=
dpp_mov
<
dpp_row_bcast
(
15
),
0xa
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_bcast
(
31
),
0xc
>
(
in
);
out
=
dpp_mov
<
dpp_row_bcast
(
31
),
0xc
>
(
in
);
in
=
op
(
in
,
out
);
in
=
op
(
in
,
out
);
}
}
__device__
void
dpp_reduce
(
float
&
x
,
sum
)
__device__
void
dpp_reduce
(
float
&
x
,
sum
)
...
@@ -121,13 +117,13 @@ __device__ void dpp_reduce(float& x, sum)
...
@@ -121,13 +117,13 @@ __device__ void dpp_reduce(float& x, sum)
template
<
std
::
size_t
N
,
class
Op
,
class
T
,
class
F
>
template
<
std
::
size_t
N
,
class
Op
,
class
T
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
std
::
size_t
n
,
F
f
)
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
std
::
size_t
n
,
F
f
)
{
{
using
type
=
decltype
(
f
(
idx
.
local
));
using
type
=
decltype
(
f
(
idx
.
local
));
const
auto
std
::
size_t
wave
=
64
;
const
auto
std
::
size_t
wave
=
64
;
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
/
64
];
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
/
64
];
type
x
=
init
;
type
x
=
init
;
idx
.
local_stride
(
n
,
[
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
idx
.
local_stride
(
n
,
[
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
dpp_reduce
(
x
,
op
);
dpp_reduce
(
x
,
op
);
const
auto
ldsidx
=
idx
.
local
/
64
;
const
auto
ldsidx
=
idx
.
local
/
64
;
if
((
idx
.
local
%
64
)
==
63
)
if
((
idx
.
local
%
64
)
==
63
)
{
{
...
@@ -136,7 +132,7 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f)
...
@@ -136,7 +132,7 @@ __device__ auto block_reduce(index idx, Op op, T init, std::size_t n, F f)
__syncthreads
();
__syncthreads
();
type
y
=
0
;
type
y
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
idx
.
nlocal
()
/
64
;
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
idx
.
nlocal
()
/
64
;
i
++
)
{
{
y
+=
buffer
[
i
];
y
+=
buffer
[
i
];
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment