Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
9d3fb0b5
Unverified
Commit
9d3fb0b5
authored
Aug 05, 2023
by
Ted Themistokleous
Committed by
GitHub
Aug 05, 2023
Browse files
Merge branch 'develop' into enable_navi_32_ci
parents
9c91c08d
aeb9f78c
Changes
278
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
176 additions
and
93 deletions
+176
-93
src/targets/cpu/include/migraphx/cpu/lowering.hpp
src/targets/cpu/include/migraphx/cpu/lowering.hpp
+2
-3
src/targets/cpu/include/migraphx/cpu/target.hpp
src/targets/cpu/include/migraphx/cpu/target.hpp
+1
-2
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+2
-2
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+33
-9
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+5
-6
src/targets/gpu/compile_miopen.cpp
src/targets/gpu/compile_miopen.cpp
+1
-1
src/targets/gpu/driver/CMakeLists.txt
src/targets/gpu/driver/CMakeLists.txt
+1
-1
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+1
-0
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+5
-2
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
+5
-6
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
...gets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
+7
-5
src/targets/gpu/include/migraphx/gpu/compile_ops.hpp
src/targets/gpu/include/migraphx/gpu/compile_ops.hpp
+2
-2
src/targets/gpu/include/migraphx/gpu/compiler.hpp
src/targets/gpu/include/migraphx/gpu/compiler.hpp
+17
-18
src/targets/gpu/include/migraphx/gpu/config.hpp
src/targets/gpu/include/migraphx/gpu/config.hpp
+31
-0
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+1
-0
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+0
-2
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+53
-28
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
+5
-2
No files found.
src/targets/cpu/include/migraphx/cpu/lowering.hpp
View file @
9d3fb0b5
...
...
@@ -24,8 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <string>
#include <migraphx/config.hpp>
#include <migraphx/cpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -34,7 +33,7 @@ struct module;
namespace
cpu
{
struct
lowering
struct
MIGRAPHX_CPU_EXPORT
lowering
{
std
::
string
name
()
const
{
return
"cpu::lowering"
;
}
void
apply
(
module
&
m
)
const
;
...
...
src/targets/cpu/include/migraphx/cpu/target.hpp
View file @
9d3fb0b5
...
...
@@ -28,14 +28,13 @@
#include <migraphx/register_target.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
pass
;
namespace
cpu
{
struct
target
struct
MIGRAPHX_CPU_EXPORT
target
{
std
::
string
name
()
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
)
const
;
...
...
src/targets/cpu/lowering.cpp
View file @
9d3fb0b5
...
...
@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
@@ -345,7 +345,7 @@ struct cpu_apply
extend_op
(
"contiguous"
,
"dnnl::reorder"
);
extend_op
(
"convolution"
,
"dnnl::convolution"
);
#ifndef MIGRAPHX_ENABLE_ZENDNN
extend_op
(
"
de
convolution"
,
"dnnl::
de
convolution"
);
extend_op
(
"convolution
_backwards
"
,
"dnnl::convolution
_backwards
"
);
extend_op
(
"dot"
,
"dnnl::dot"
);
#endif
extend_op
(
"erf"
,
"cpu::erf"
);
...
...
src/targets/gpu/CMakeLists.txt
View file @
9d3fb0b5
...
...
@@ -33,7 +33,10 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
find_package
(
composable_kernel 1.0.0 COMPONENTS jit_library REQUIRED
)
if
(
NOT WIN32
)
# TODO: re-enable when CK is ported to Windows
find_package
(
composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library
)
endif
()
if
(
BUILD_DEV
)
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
"Use hipRTC APIs"
)
...
...
@@ -42,12 +45,12 @@ else()
endif
()
include
(
Embed
)
file
(
GLOB KERNEL_FILES
${
CONFIGURE_DEPENDS
}
file
(
GLOB KERNEL_FILES CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/
)
file
(
GLOB DEVICE_GPU_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
file
(
GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
add_library
(
compile_for_gpu INTERFACE
)
...
...
@@ -67,6 +70,8 @@ target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries
(
migraphx_device PRIVATE compile_for_gpu
)
target_include_directories
(
migraphx_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraphx_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
target_compile_options
(
migraphx_device PRIVATE -Wno-ignored-attributes
)
migraphx_generate_export_header
(
migraphx_device DIRECTORY migraphx/gpu/device
)
add_library
(
kernel_file_check EXCLUDE_FROM_ALL
)
...
...
@@ -82,7 +87,13 @@ target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check
(
kernel_file_check
)
file
(
GLOB JIT_GPU_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/*.cpp
)
file
(
GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/*.cpp
)
if
(
WIN32
)
# TODO: re-enable when CK is ported to Windows
list
(
REMOVE_ITEM JIT_GPU_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm.cpp
)
endif
()
add_library
(
migraphx_gpu
abs.cpp
analyze_streams.cpp
...
...
@@ -131,7 +142,9 @@ add_library(migraphx_gpu
write_literals.cpp
${
JIT_GPU_SRCS
}
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
migraphx_generate_export_header
(
migraphx_gpu
)
function
(
register_migraphx_gpu_ops PREFIX
)
foreach
(
OP
${
ARGN
}
)
...
...
@@ -173,7 +186,7 @@ register_op(migraphx_gpu
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::
de
convolution> gpu::miopen_convolution<op::quant_convolution>
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::convolution
_backwards
> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
@@ -185,7 +198,9 @@ if(MIGRAPHX_ENABLE_MLIR)
find_package
(
rocMLIR 1.0.0 CONFIG REQUIRED
)
message
(
STATUS
"Build with rocMLIR::rockCompiler
${
rocMLIR_VERSION
}
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_MLIR"
)
target_link_libraries
(
migraphx_gpu PUBLIC rocMLIR::rockCompiler
)
# Make this private to avoid multiple inclusions of LLVM symbols.
# TODO: Fix rocMLIR's library to hide LLVM internals.
target_link_libraries
(
migraphx_gpu PRIVATE rocMLIR::rockCompiler
)
endif
()
if
(
MIGRAPHX_USE_HIPRTC
)
...
...
@@ -231,7 +246,12 @@ check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
if
(
MIGRAPHX_USE_FIND_2_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
check_library_exists
(
MIOpen
"miopenSetFindOptionPreallocatedTensor"
"
${
MIOPEN_LOCATION
}
"
HAS_PREALLOCATION_API
)
if
(
HAS_PREALLOCATION_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API -DMIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
)
else
()
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
endif
()
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
else
()
message
(
STATUS
"MIGraphx is using legacy Find API in MIOpen"
)
...
...
@@ -245,7 +265,11 @@ else()
endif
()
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels composable_kernel::jit_library
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
if
(
NOT WIN32
)
# TODO: re-enable when CK is ported to Windows
target_link_libraries
(
migraphx_gpu PRIVATE composable_kernel::jit_library
)
endif
()
add_subdirectory
(
driver
)
add_subdirectory
(
hiprtc
)
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
9d3fb0b5
...
...
@@ -135,14 +135,13 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
return
[
n
,
over
,
max_global
](
std
::
size_t
local
)
{
std
::
size_t
num_elements
=
n
;
// hip require global workitems multiple of local workitems. It may degrade performance.
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available.
// https://reviews.llvm.org/D155213
std
::
size_t
num_elements
=
((
n
+
local
-
1
)
/
local
)
*
local
;
std
::
size_t
groups
=
(
num_elements
+
local
-
1
)
/
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
#ifdef MIGRAPHX_USE_HIPRTC
if
(
enabled
(
MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS
{}))
num_elements
=
((
num_elements
+
local
-
1
)
/
local
)
*
local
;
#endif
return
std
::
min
(
nglobal
,
num_elements
);
};
}
...
...
@@ -168,7 +167,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
[](
auto
&&
p
)
{
auto
&&
name
=
p
.
first
;
auto
&&
c
=
p
.
second
;
auto
path
=
fs
::
path
{
"migraphx"
}
/
"kernels"
/
name
;
auto
path
=
name
;
return
src_file
{
path
,
c
};
});
srcs
.
push_back
(
src_file
{
fs
::
path
{
"main.cpp"
},
...
...
src/targets/gpu/compile_miopen.cpp
View file @
9d3fb0b5
...
...
@@ -79,7 +79,7 @@ void compile_miopen::apply(module& m) const
std
::
size_t
ws
=
0
;
try
{
// for the regular convolution and
de
convolution, this try would always succeed
// for the regular convolution and convolution
_backwards
, this try would always succeed
ws
=
compile
(
op
,
ins
,
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
...
...
src/targets/gpu/driver/CMakeLists.txt
View file @
9d3fb0b5
...
...
@@ -22,7 +22,7 @@
# THE SOFTWARE.
#####################################################################################
file
(
GLOB GPU_DRIVER_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/*.cpp
)
file
(
GLOB GPU_DRIVER_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/*.cpp
)
add_executable
(
gpu-driver
${
GPU_DRIVER_SRCS
}
)
...
...
src/targets/gpu/fuse_mlir.cpp
View file @
9d3fb0b5
...
...
@@ -216,6 +216,7 @@ struct find_mlir_op
"quant_dot"
,
"add"
,
"clip"
,
"relu"
,
"sub"
,
"mul"
,
"div"
,
...
...
src/targets/gpu/gemm_impl.cpp
View file @
9d3fb0b5
...
...
@@ -140,8 +140,11 @@ void gemm_impl(context& ctx,
compute_type
=
rocblas_datatype_f32_r
;
}
rocblas_gemm_flags
flag
=
int8_x4_format
?
rocblas_gemm_flags_pack_int8x4
:
rocblas_gemm_flags_none
;
rocblas_gemm_flags
flag
=
rocblas_gemm_flags_none
;
#if ROCBLAS_VERSION_MAJOR < 3
if
(
int8_x4_format
)
flag
=
rocblas_gemm_flags_pack_int8x4
;
#endif
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
...
...
src/targets/gpu/include/migraphx/gpu/allocation_model.hpp
View file @
9d3fb0b5
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/instruction_ref.hpp>
#include <string>
...
...
@@ -33,7 +33,7 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
gpu_allocation_model
struct
MIGRAPHX_GPU_EXPORT
gpu_allocation_model
{
std
::
string
name
()
const
;
std
::
string
copy
()
const
;
...
...
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
View file @
9d3fb0b5
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/analyze_streams.hpp>
namespace
migraphx
{
...
...
@@ -34,7 +34,7 @@ struct module;
namespace
gpu
{
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
View file @
9d3fb0b5
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/filesystem.hpp>
#include <migraphx/compile_src.hpp>
#include <migraphx/env.hpp>
...
...
@@ -58,14 +58,13 @@ struct hiprtc_src_file
}
};
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src_with_hiprtc
(
std
::
vector
<
hiprtc_src_file
>
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src_with_hiprtc
(
std
::
vector
<
hiprtc_src_file
>
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
std
::
vector
<
std
::
vector
<
char
>>
MIGRAPHX_GPU_EXPORT
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
);
MIGRAPHX_GPU_EXPORT
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
View file @
9d3fb0b5
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/compile_src.hpp>
...
...
@@ -66,14 +66,16 @@ struct hip_compile_options
};
/// Compute global for n elements, but max out on target-specific upper limit
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
MIGRAPHX_GPU_EXPORT
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
compute_global_for
(
context
&
ctx
,
std
::
size_t
n
,
std
::
size_t
over
=
1
);
operation
compile_hip_code_object
(
const
std
::
string
&
content
,
hip_compile_options
options
);
MIGRAPHX_GPU_EXPORT
operation
compile_hip_code_object
(
const
std
::
string
&
content
,
hip_compile_options
options
);
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
=
1024
);
MIGRAPHX_GPU_EXPORT
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
=
1024
);
std
::
string
generate_make_shape
(
const
shape
&
s
);
MIGRAPHX_GPU_EXPORT
std
::
string
generate_make_shape
(
const
shape
&
s
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/compile_ops.hpp
View file @
9d3fb0b5
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP
#define MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <string>
namespace
migraphx
{
...
...
@@ -36,7 +36,7 @@ namespace gpu {
struct
context
;
struct
compile_ops
struct
MIGRAPHX_GPU_EXPORT
compile_ops
{
context
*
ctx
=
nullptr
;
bool
exhaustive_tune
=
false
;
...
...
src/targets/gpu/include/migraphx/gpu/compiler.hpp
View file @
9d3fb0b5
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_GPU_COMPILER_HPP
#define MIGRAPHX_GUARD_GPU_COMPILER_HPP
#include <migraphx/config.hpp>
#include <migraphx/
gpu/
config.hpp>
#include <migraphx/auto_register.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/value.hpp>
...
...
@@ -32,6 +32,7 @@
#include <migraphx/instruction.hpp>
#include <migraphx/optional.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/gpu/tuning_config.hpp>
#include <functional>
namespace
migraphx
{
...
...
@@ -68,12 +69,6 @@ struct compiler_replace
}
};
struct
tuning_config
{
value
problem
;
std
::
vector
<
value
>
solutions
;
};
using
compiler_compile
=
std
::
function
<
compiler_replace
(
context
&
,
instruction_ref
,
operation
,
const
value
&
)
>
;
using
compiler_compile_op
=
...
...
@@ -81,17 +76,21 @@ using compiler_compile_op =
using
compiler_tuning_config
=
std
::
function
<
optional
<
tuning_config
>
(
context
&
,
instruction_ref
,
const
operation
&
,
bool
)
>
;
void
register_compiler
(
const
std
::
string
&
name
,
compiler_compile
c
,
compiler_compile_op
cop
,
compiler_tuning_config
ctg
);
bool
has_compiler_for
(
const
std
::
string
&
name
);
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
const
value
&
solution
);
operation
compile_op
(
const
std
::
string
&
name
,
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
);
optional
<
tuning_config
>
MIGRAPHX_GPU_EXPORT
void
register_compiler
(
const
std
::
string
&
name
,
compiler_compile
c
,
compiler_compile_op
cop
,
compiler_tuning_config
ctg
);
MIGRAPHX_GPU_EXPORT
bool
has_compiler_for
(
const
std
::
string
&
name
);
MIGRAPHX_GPU_EXPORT
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
const
value
&
solution
);
MIGRAPHX_GPU_EXPORT
operation
compile_op
(
const
std
::
string
&
name
,
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
);
MIGRAPHX_GPU_EXPORT
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
);
template
<
class
T
>
...
...
src/targets/gpu/include/migraphx/gpu/config.hpp
0 → 100644
View file @
9d3fb0b5
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_GPU_CONFIG_HPP
#define MIGRAPHX_GUARD_GPU_CONFIG_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/export.h>
#endif // MIGRAPHX_GUARD_GPU_CONFIG_HPP
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
9d3fb0b5
...
...
@@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
#include <migraphx/gpu/export.h>
#include <migraphx/context.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/rocblas.hpp>
...
...
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
View file @
9d3fb0b5
...
...
@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
if
(
inputs
.
front
().
standard
())
return
inputs
.
front
();
auto
lens
=
inputs
.
at
(
0
).
lens
();
auto
t
=
inputs
.
at
(
0
).
type
();
return
{
t
,
lens
};
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
9d3fb0b5
...
...
@@ -31,7 +31,7 @@
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
...
...
@@ -146,7 +146,8 @@ struct miopen_convolution
void
set_conv_descriptor
()
{
cd
=
(
op
.
name
()
==
"deconvolution"
)
?
make_deconv
(
op
)
:
make_conv
(
op
);
cd
=
(
op
.
name
()
==
"convolution_backwards"
)
?
make_convolution_backwards
(
op
)
:
make_conv
(
op
);
}
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
...
...
@@ -159,10 +160,31 @@ struct miopen_convolution
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
std
::
size_t
workspace_size
=
0
;
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
miopen_stream_handle
,
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
...
...
@@ -170,13 +192,34 @@ struct miopen_convolution
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
bool
preallocate
=
false
;
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate
=
true
;
#endif
auto
x
=
preallocate
?
to_gpu
(
generate_argument
(
x_shape
))
:
inputs
[
0
];
auto
w
=
preallocate
?
to_gpu
(
generate_argument
(
w_shape
))
:
inputs
[
1
];
auto
y
=
preallocate
?
allocate_gpu
(
output_shape
)
:
inputs
[
2
];
auto
workspace
=
preallocate
?
allocate_gpu
(
workspace_shape
)
:
migraphx
::
argument
(
workspace_shape
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
x
.
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
w
.
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
y
.
implicit
()},
};
solution_ptr
=
find_solution
(
miopen_stream_handle
,
3
,
tensor_args
,
workspace
.
implicit
(),
workspace_size
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
...
...
@@ -195,29 +238,10 @@ struct miopen_convolution
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
#else
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
...
...
@@ -337,6 +361,7 @@ struct miopen_convolution
return
{
s
.
type
(),
lens
,
strides
};
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
View file @
9d3fb0b5
...
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMAX_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/
gpu/device/
config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
...
...
@@ -33,7 +33,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
void
argmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
);
void
MIGRAPHX_DEVICE_EXPORT
argmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
);
}
// namespace device
}
// namespace gpu
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment