Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
97056d33
Commit
97056d33
authored
Jun 25, 2019
by
Paul
Browse files
Add gpu reduce_sum
parent
d0c53a98
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
163 additions
and
1 deletion
+163
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+2
-0
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+13
-1
src/targets/gpu/device/reduce_sum.cpp
src/targets/gpu/device/reduce_sum.cpp
+76
-0
src/targets/gpu/include/migraphx/gpu/device/reduce_sum.hpp
src/targets/gpu/include/migraphx/gpu/device/reduce_sum.hpp
+20
-0
src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
+38
-0
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+2
-0
test/gpu/miopen.cpp
test/gpu/miopen.cpp
+12
-0
No files found.
src/targets/gpu/CMakeLists.txt
View file @
97056d33
...
...
@@ -35,6 +35,7 @@ add_library(migraphx_device
device/gather.cpp
device/sub.cpp
device/clip.cpp
device/reduce_sum.cpp
)
set_target_properties
(
migraphx_device PROPERTIES EXPORT_NAME device
)
rocm_clang_tidy_check
(
migraphx_device
)
...
...
@@ -70,6 +71,7 @@ add_library(migraphx_gpu
schedule_model.cpp
adjust_allocation.cpp
clip.cpp
reduce_sum.cpp
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
97056d33
...
...
@@ -35,6 +35,18 @@ inline auto launch(hipStream_t stream, std::size_t global, std::size_t local)
};
}
template
<
class
F
>
__host__
__device__
auto
gs_invoke
(
F
&&
f
,
std
::
size_t
i
,
index
idx
)
->
decltype
(
f
(
i
,
idx
))
{
return
f
(
i
,
idx
);
}
template
<
class
F
>
__host__
__device__
auto
gs_invoke
(
F
&&
f
,
std
::
size_t
i
,
index
)
->
decltype
(
f
(
i
))
{
return
f
(
i
);
}
inline
auto
gs_launch
(
hipStream_t
stream
,
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
{
std
::
size_t
groups
=
1
+
n
/
local
;
...
...
@@ -44,7 +56,7 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102
launch
(
stream
,
nglobal
,
local
)([
=
](
auto
idx
)
{
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
f
(
i
);
gs_invoke
(
f
,
i
,
idx
);
}
});
};
...
...
src/targets/gpu/device/reduce_sum.cpp
0 → 100644
View file @
97056d33
#include <migraphx/gpu/device/reduce_sum.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/visit.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
struct
sum
{
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
T
operator
()(
T
x
,
T
y
)
const
{
return
x
+
y
;
}
};
template
<
std
::
size_t
N
,
class
Op
,
class
T
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
std
::
size_t
n
,
F
f
)
{
using
type
=
decltype
(
f
(
idx
.
local
));
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
];
type
x
=
init
;
for
(
size_t
i
=
idx
.
local
;
i
<
n
;
i
+=
N
)
{
x
=
op
(
x
,
f
(
i
));
}
buffer
[
idx
.
local
]
=
x
;
__syncthreads
();
for
(
std
::
size_t
s
=
1
;
s
<
N
;
s
*=
2
)
{
const
std
::
size_t
index
=
2
*
s
*
idx
.
local
;
if
(
index
<
N
)
{
buffer
[
index
]
=
op
(
buffer
[
index
],
buffer
[
index
+
s
]);
}
__syncthreads
();
}
return
buffer
[
0
];
}
void
reduce_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
std
::
vector
<
std
::
size_t
>
reduce_lens
;
std
::
transform
(
output_shape
.
lens
().
begin
(),
output_shape
.
lens
().
end
(),
input_shape
.
lens
().
begin
(),
std
::
back_inserter
(
reduce_lens
),
[](
auto
x
,
auto
y
)
->
std
::
size_t
{
if
(
x
==
y
)
return
1
;
else
return
y
;
});
shape
reduce_slice
{
output_shape
.
type
(),
reduce_lens
,
input_shape
.
strides
()};
hip_visit_all
(
result
,
arg
,
reduce_slice
)([
&
](
auto
output
,
auto
input
,
auto
reduce_shape
)
{
auto
nelements
=
result
.
get_shape
().
elements
();
auto
relements
=
reduce_slice
.
elements
();
const
std
::
size_t
block_size
=
1024
;
gs_launch
(
stream
,
nelements
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
auto
base_idx
=
output
.
get_shape
().
multi
(
i
/
block_size
);
auto
offset
=
input
.
get_shape
().
index
(
base_idx
);
auto
r
=
block_reduce
<
block_size
>
(
idx
,
sum
{},
0
,
relements
,
[
&
](
auto
j
)
__device__
{
return
input
.
data
()[
reduce_shape
.
index
(
j
)
+
offset
];
});
if
(
idx
.
local
==
0
)
output
.
data
()[
i
/
block_size
]
=
r
;
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/include/migraphx/gpu/device/reduce_sum.hpp
0 → 100644
View file @
97056d33
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_SUM_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_SUM_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/reduce_sum.hpp
0 → 100644
View file @
97056d33
#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP
#define MIGRAPHX_GUARD_RTGLIB_REDUCE_SUM_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/reduce_sum.hpp>
#include <migraphx/reflect.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_reduce_sum
{
op
::
reduce_sum
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::reduce_sum"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/lowering.cpp
View file @
97056d33
...
...
@@ -47,6 +47,7 @@
#include <migraphx/gpu/lrn.hpp>
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/reduce_sum.hpp>
#include <utility>
#include <functional>
#include <algorithm>
...
...
@@ -105,6 +106,7 @@ struct miopen_apply
add_extend_op
<
hip_pad
,
op
::
pad
>
(
"pad"
);
add_extend_op
<
hip_convert
,
op
::
convert
>
(
"convert"
);
add_extend_op
<
hip_clip
,
op
::
clip
>
(
"clip"
);
add_extend_op
<
hip_reduce_sum
,
op
::
reduce_sum
>
(
"reduce_sum"
);
add_lrn_op
();
add_convolution_op
();
...
...
test/gpu/miopen.cpp
View file @
97056d33
...
...
@@ -3445,4 +3445,16 @@ struct test_fp32_fp16_sub : verify_program<test_fp32_fp16_sub>
};
};
struct
test_reduce_sum
:
verify_program
<
test_reduce_sum
>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
3
,
4
,
8
,
8
}};
auto
x
=
p
.
add_parameter
(
"x"
,
s
);
p
.
add_instruction
(
migraphx
::
op
::
reduce_sum
{{
1
}},
x
);
return
p
;
};
};
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment