Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
d2549384
Commit
d2549384
authored
Feb 01, 2019
by
Khalique
Browse files
manual merge
parents
67048d04
ab6cd9d3
Changes
303
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
355 additions
and
180 deletions
+355
-180
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+32
-14
src/targets/gpu/abs.cpp
src/targets/gpu/abs.cpp
+39
-0
src/targets/gpu/add.cpp
src/targets/gpu/add.cpp
+0
-55
src/targets/gpu/batchnorm.cpp
src/targets/gpu/batchnorm.cpp
+10
-9
src/targets/gpu/concat.cpp
src/targets/gpu/concat.cpp
+9
-9
src/targets/gpu/contiguous.cpp
src/targets/gpu/contiguous.cpp
+8
-8
src/targets/gpu/convolution.cpp
src/targets/gpu/convolution.cpp
+43
-29
src/targets/gpu/device/acos.cpp
src/targets/gpu/device/acos.cpp
+18
-0
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+6
-6
src/targets/gpu/device/add_relu.cpp
src/targets/gpu/device/add_relu.cpp
+6
-6
src/targets/gpu/device/asin.cpp
src/targets/gpu/device/asin.cpp
+18
-0
src/targets/gpu/device/atan.cpp
src/targets/gpu/device/atan.cpp
+18
-0
src/targets/gpu/device/concat.cpp
src/targets/gpu/device/concat.cpp
+11
-11
src/targets/gpu/device/contiguous.cpp
src/targets/gpu/device/contiguous.cpp
+6
-6
src/targets/gpu/device/cos.cpp
src/targets/gpu/device/cos.cpp
+18
-0
src/targets/gpu/device/cosh.cpp
src/targets/gpu/device/cosh.cpp
+18
-0
src/targets/gpu/device/exp.cpp
src/targets/gpu/device/exp.cpp
+18
-0
src/targets/gpu/device/gather.cpp
src/targets/gpu/device/gather.cpp
+44
-0
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+9
-9
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+24
-18
No files found.
src/targets/gpu/CMakeLists.txt
View file @
d2549384
...
@@ -10,20 +10,34 @@ if(NOT TARGET MIOpen)
...
@@ -10,20 +10,34 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
endif
()
add_library
(
migraph_device
add_library
(
migraph
x
_device
device/add.cpp
device/add.cpp
device/max.cpp
device/min.cpp
device/exp.cpp
device/log.cpp
device/sin.cpp
device/cos.cpp
device/tan.cpp
device/sinh.cpp
device/cosh.cpp
device/asin.cpp
device/acos.cpp
device/atan.cpp
device/add_relu.cpp
device/add_relu.cpp
device/contiguous.cpp
device/contiguous.cpp
device/mul.cpp
device/mul.cpp
device/concat.cpp
device/concat.cpp
device/pad.cpp
device/gather.cpp
)
)
set_target_properties
(
migraph_device PROPERTIES EXPORT_NAME device
)
set_target_properties
(
migraph
x
_device PROPERTIES EXPORT_NAME device
)
rocm_clang_tidy_check
(
migraph_device
)
rocm_clang_tidy_check
(
migraph
x
_device
)
target_link_libraries
(
migraph_device migraph hip::device
)
target_link_libraries
(
migraph
x
_device migraph
x
hip::device
-Wno-invalid-command-line-argument -amdgpu-target=gfx803 -amdgpu-target=gfx900 -amdgpu-target=gfx906
)
target_include_directories
(
migraph_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraph
x
_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraph_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
target_include_directories
(
migraph
x
_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
add_library
(
migraph_gpu
add_library
(
migraph
x
_gpu
eliminate_workspace.cpp
eliminate_workspace.cpp
fuse_ops.cpp
fuse_ops.cpp
hip.cpp
hip.cpp
...
@@ -37,20 +51,24 @@ add_library(migraph_gpu
...
@@ -37,20 +51,24 @@ add_library(migraph_gpu
concat.cpp
concat.cpp
relu.cpp
relu.cpp
leaky_relu.cpp
leaky_relu.cpp
add.cpp
tanh.cpp
mul.cpp
batchnorm.cpp
batchnorm.cpp
write_literals.cpp
write_literals.cpp
rocblas.cpp
rocblas.cpp
sigmoid.cpp
abs.cpp
elu.cpp
pad.cpp
gather.cpp
lrn.cpp
lrn.cpp
)
)
set_target_properties
(
migraph_gpu PROPERTIES EXPORT_NAME gpu
)
set_target_properties
(
migraph
x
_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraph_gpu
)
rocm_clang_tidy_check
(
migraph
x
_gpu
)
target_link_libraries
(
migraph_gpu PUBLIC migraph MIOpen roc::rocblas
)
target_link_libraries
(
migraph
x
_gpu PUBLIC migraph
x
MIOpen roc::rocblas
)
target_link_libraries
(
migraph_gpu PRIVATE migraph_device
)
target_link_libraries
(
migraph
x
_gpu PRIVATE migraph
x
_device
)
rocm_install_targets
(
rocm_install_targets
(
TARGETS migraph_gpu migraph_device
TARGETS migraph
x
_gpu migraph
x
_device
INCLUDE
INCLUDE
${
CMAKE_CURRENT_SOURCE_DIR
}
/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/include
)
)
...
...
src/targets/gpu/abs.cpp
0 → 100644
View file @
d2549384
#include <migraphx/gpu/abs.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_abs
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
return
inputs
.
at
(
1
);
}
argument
miopen_abs
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/add.cpp
deleted
100644 → 0
View file @
67048d04
#include <migraph/gpu/add.hpp>
#include <migraph/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/config.hpp>
#include <migraph/gpu/miopen.hpp>
#include <utility>
namespace
migraph
{
inline
namespace
MIGRAPH_INLINE_NS
{
namespace
gpu
{
shape
hip_add
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
// check_shapes{inputs, *this}.has(3).standard();
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
at
(
0
);
}
argument
hip_add
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
add
(
ctx
.
get_stream
().
get
(),
args
[
2
],
args
[
0
],
args
[
1
]);
return
args
[
2
];
}
shape
miopen_add
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
).
not_broadcasted
();
return
inputs
.
at
(
0
);
}
argument
miopen_add
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
,
beta
=
0
;
auto
a_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
b_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
c_desc
=
make_tensor
(
output_shape
);
miopenOpTensor
(
ctx
.
get_stream
().
get_miopen
(),
miopenTensorOpAdd
,
&
alpha
,
a_desc
.
get
(),
args
[
0
].
implicit
(),
&
alpha
,
b_desc
.
get
(),
args
[
1
].
implicit
(),
&
beta
,
c_desc
.
get
(),
args
[
2
].
implicit
());
return
args
[
2
];
}
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace migraph
src/targets/gpu/batchnorm.cpp
View file @
d2549384
#include <migraph/gpu/batchnorm.hpp>
#include <migraph
x
/gpu/batchnorm.hpp>
#include <migraph/operators.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <utility>
#include <utility>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
...
@@ -22,7 +22,8 @@ argument miopen_batch_norm_inference::compute(context& ctx,
auto
y_desc
=
make_tensor
(
output_shape
);
auto
y_desc
=
make_tensor
(
output_shape
);
auto
bn_desc
=
make_tensor
(
args
[
3
].
get_shape
());
auto
bn_desc
=
make_tensor
(
args
[
3
].
get_shape
());
float
alpha
=
1.0
,
beta
=
0.0
f
;
float
alpha
=
1.0
;
float
beta
=
0.0
f
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
miopenBatchNormMode_t
(
op
.
bn_mode
),
miopenBatchNormMode_t
(
op
.
bn_mode
),
...
@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
...
@@ -43,5 +44,5 @@ argument miopen_batch_norm_inference::compute(context& ctx,
}
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/concat.cpp
View file @
d2549384
#include <migraph/gpu/concat.hpp>
#include <migraph
x
/gpu/concat.hpp>
#include <migraph/operators.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraph
x
/gpu/device/concat.hpp>
#include <utility>
#include <utility>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
...
@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
...
@@ -24,5 +24,5 @@ argument hip_concat::compute(context& ctx,
}
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/contiguous.cpp
View file @
d2549384
#include <migraph/gpu/contiguous.hpp>
#include <migraph
x
/gpu/contiguous.hpp>
#include <migraph/operators.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <utility>
#include <utility>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
shape
miopen_contiguous
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
miopen_contiguous
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
...
@@ -25,5 +25,5 @@ argument miopen_contiguous::compute(context& ctx,
}
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/convolution.cpp
View file @
d2549384
#include <migraph/gpu/convolution.hpp>
#include <migraph
x
/gpu/convolution.hpp>
#include <migraph/operators.hpp>
#include <migraph
x
/operators.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph
x
/manage_ptr.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph
x
/gpu/miopen.hpp>
#include <utility>
#include <utility>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
shape
miopen_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
miopen_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
...
@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
...
@@ -21,7 +21,8 @@ argument miopen_convolution::compute(context& ctx,
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
,
beta
=
0
;
float
alpha
=
1
;
float
beta
=
0
;
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
&
alpha
,
x_desc
.
get
(),
x_desc
.
get
(),
...
@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
...
@@ -40,11 +41,11 @@ argument miopen_convolution::compute(context& ctx,
shape
miopen_convolution
::
compile
(
context
&
ctx
,
shape
miopen_convolution
::
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
const
shape
&
output_shape
,
std
::
vector
<
instruction_ref
>
inputs
)
std
::
vector
<
shape
>
inputs
)
{
{
shape
workspace_shape
{};
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
]
->
get_shape
()
);
auto
x_desc
=
make_tensor
(
inputs
[
0
]);
auto
w_desc
=
make_tensor
(
inputs
[
1
]
->
get_shape
()
);
auto
w_desc
=
make_tensor
(
inputs
[
1
]);
auto
y_desc
=
make_tensor
(
output_shape
);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
std
::
size_t
workspace_size
=
0
;
...
@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
...
@@ -56,31 +57,44 @@ shape miopen_convolution::compile(context& ctx,
&
workspace_size
);
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]
->
get_shape
()
));
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]
->
get_shape
()
));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]));
auto
y
=
allocate_gpu
(
output_shape
);
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
miopenConvAlgoPerf_t
perf
;
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x_desc
.
get
(),
x
.
implicit
(),
x
.
implicit
(),
w_desc
.
get
(),
w_desc
.
get
(),
w
.
implicit
(),
w
.
implicit
(),
cd
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
y
.
implicit
(),
1
,
1
,
&
algo_count
,
&
algo_count
,
&
perf
,
&
perf
,
workspace
.
implicit
(),
workspace
.
implicit
(),
workspace_size
,
workspace_size
,
false
);
false
);
algo
=
perf
.
fwd_algo
;
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Find convolution failed"
);
handle
=
ctx
.
get_stream
().
get_miopen
();
algo
=
perf
.
fwd_algo
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
}
void
miopen_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// TODO: Check that workspace hasn't changed
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/device/acos.cpp
0 → 100644
View file @
d2549384
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acos
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/add.cpp
View file @
d2549384
#include <migraph/gpu/device/add.hpp>
#include <migraph
x
/gpu/device/add.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraph
x
/gpu/device/nary.hpp>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
...
@@ -22,5 +22,5 @@ void add(hipStream_t stream,
...
@@ -22,5 +22,5 @@ void add(hipStream_t stream,
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/device/add_relu.cpp
View file @
d2549384
#include <migraph/gpu/device/add_relu.hpp>
#include <migraph
x
/gpu/device/add_relu.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraph
x
/gpu/device/nary.hpp>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
...
@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
...
@@ -27,5 +27,5 @@ void add_relu(hipStream_t stream,
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/device/asin.cpp
0 → 100644
View file @
d2549384
#include <migraphx/gpu/device/asin.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
asin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
asin
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/atan.cpp
0 → 100644
View file @
d2549384
#include <migraphx/gpu/device/atan.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
atan
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
atan
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/concat.cpp
View file @
d2549384
#include <migraph/shape.hpp>
#include <migraph
x
/shape.hpp>
#include <migraph/argument.hpp>
#include <migraph
x
/argument.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraph
x
/gpu/device/concat.hpp>
#include <migraph/gpu/device/tensor.hpp>
#include <migraph
x
/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
#include <migraph
x
/gpu/device/launch.hpp>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
argument
concat
(
hipStream_t
stream
,
argument
concat
(
hipStream_t
stream
,
const
migraph
::
shape
&
output_shape
,
const
migraph
x
::
shape
&
output_shape
,
std
::
vector
<
migraph
::
argument
>
args
,
std
::
vector
<
migraph
x
::
argument
>
args
,
std
::
vector
<
std
::
size_t
>
offsets
)
std
::
vector
<
std
::
size_t
>
offsets
)
{
{
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
()
-
1
;
l
++
)
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
()
-
1
;
l
++
)
...
@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
...
@@ -34,5 +34,5 @@ argument concat(hipStream_t stream,
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/device/contiguous.cpp
View file @
d2549384
#include <migraph/gpu/device/contiguous.hpp>
#include <migraph
x
/gpu/device/contiguous.hpp>
#include <migraph/gpu/device/nary.hpp>
#include <migraph
x
/gpu/device/nary.hpp>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
...
@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg)
...
@@ -14,5 +14,5 @@ void contiguous(hipStream_t stream, argument result, argument arg)
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
src/targets/gpu/device/cos.cpp
0 → 100644
View file @
d2549384
#include <migraphx/gpu/device/cos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
cos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
cos
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/cosh.cpp
0 → 100644
View file @
d2549384
#include <migraphx/gpu/device/cosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
cosh
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
cosh
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/exp.cpp
0 → 100644
View file @
d2549384
#include <migraphx/gpu/device/exp.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
exp
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
exp
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/gather.cpp
0 → 100644
View file @
d2549384
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/gather.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
argument
gather
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
int
axis_index
=
(
axis
<
0
)
?
(
axis
+
output_shape
.
lens
().
size
())
:
axis
;
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
output_shape
.
elements
();
args
[
1
].
visit
([
&
](
auto
indices
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
auto
*
outptr
=
device_cast
(
output
.
data
());
const
auto
*
inptr
=
device_cast
(
input
.
data
());
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
lens
=
desc_output
.
multi
(
i
);
lens
[
axis_index
]
=
indices_ptr
[
lens
[
axis_index
]];
outptr
[
i
]
=
inptr
[
desc_input
.
linear
(
lens
)];
});
});
});
});
return
args
.
back
();
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
→
src/targets/gpu/device/include/migraph
x
/gpu/device/launch.hpp
View file @
d2549384
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#ifndef MIGRAPH
X
_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#define MIGRAPH
X
_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
#include <hip/hip_runtime.h>
#include <hip/hip_runtime.h>
#include <migraph/config.hpp>
#include <migraph
x
/config.hpp>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
...
@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102
...
@@ -53,14 +53,14 @@ inline auto gs_launch(hipStream_t stream, std::size_t n, std::size_t local = 102
// Workaround hcc's broken tile_static macro
// Workaround hcc's broken tile_static macro
#ifdef tile_static
#ifdef tile_static
#undef tile_static
#undef tile_static
#define MIGRAPH_DEVICE_SHARED __attribute__((tile_static))
#define MIGRAPH
X
_DEVICE_SHARED __attribute__((tile_static))
#else
#else
#define MIGRAPH_DEVICE_SHARED __shared__
#define MIGRAPH
X
_DEVICE_SHARED __shared__
#endif
#endif
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
#endif
#endif
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
→
src/targets/gpu/device/include/migraph
x
/gpu/device/nary.hpp
View file @
d2549384
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP
#ifndef MIGRAPH
X
_GUARD_RTGLIB_DEVICE_NARY_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_NARY_HPP
#define MIGRAPH
X
_GUARD_RTGLIB_DEVICE_NARY_HPP
#include <migraph/gpu/device/tensor.hpp>
#include <migraph
x
/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
#include <migraph
x
/gpu/device/launch.hpp>
#include <migraph/gpu/device/types.hpp>
#include <migraph
x
/gpu/device/types.hpp>
#include <migraph/functional.hpp>
#include <migraph
x
/functional.hpp>
#include <migraph/ranges.hpp>
#include <migraph
x
/ranges.hpp>
#include <migraph/config.hpp>
#include <migraph
x
/config.hpp>
namespace
migraph
{
namespace
migraph
x
{
inline
namespace
MIGRAPH_INLINE_NS
{
inline
namespace
MIGRAPH
X
_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
...
@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
...
@@ -87,7 +87,7 @@ void trinary_broadcast_vec_impl(hipStream_t stream,
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
MIGRAPH
X
_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
{
{
...
@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream,
...
@@ -144,7 +144,7 @@ void trinary_broadcast_impl(hipStream_t stream,
const
std
::
size_t
n
=
output
.
size
();
const
std
::
size_t
n
=
output
.
size
();
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
MIGRAPH
X
_DEVICE_SHARED
type
buffer
[
2048
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
{
{
...
@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl(
...
@@ -192,7 +192,7 @@ void binary_broadcast_vec_impl(
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
MIGRAPH
X
_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
{
{
...
@@ -243,7 +243,7 @@ void binary_broadcast_impl(
...
@@ -243,7 +243,7 @@ void binary_broadcast_impl(
const
std
::
size_t
n
=
output
.
size
();
const
std
::
size_t
n
=
output
.
size
();
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
MIGRAPH
X
_DEVICE_SHARED
type
buffer
[
2048
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
{
{
...
@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
...
@@ -313,6 +313,12 @@ void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
nary_nonstandard_impl
(
stream
,
f
,
result
,
args
...);
nary_nonstandard_impl
(
stream
,
f
,
result
,
args
...);
}
}
template
<
class
F
>
void
nary_impl
(
hipStream_t
stream
,
F
f
,
argument
result
)
{
nary_standard_impl
(
stream
,
f
,
result
);
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary_nonstandard
(
hipStream_t
stream
,
argument
result
,
Arguments
...
args
)
auto
nary_nonstandard
(
hipStream_t
stream
,
argument
result
,
Arguments
...
args
)
{
{
...
@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream,
...
@@ -396,7 +402,7 @@ inline auto nary(hipStream_t stream,
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPH_INLINE_NS
}
// namespace MIGRAPH
X
_INLINE_NS
}
// namespace migraph
}
// namespace migraph
x
#endif
#endif
Prev
1
…
3
4
5
6
7
8
9
10
11
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment