Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23cb7917
Unverified
Commit
23cb7917
authored
Aug 16, 2023
by
Brian Pickrell
Committed by
GitHub
Aug 16, 2023
Browse files
Merge branch 'develop' into blas_tuning
parents
b5fcc0bc
ea32ca70
Changes
458
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
313 additions
and
49 deletions
+313
-49
src/sqlite.cpp
src/sqlite.cpp
+1
-0
src/target.cpp
src/target.cpp
+37
-0
src/targets/cpu/CMakeLists.txt
src/targets/cpu/CMakeLists.txt
+2
-2
src/targets/cpu/deconvolution.cpp
src/targets/cpu/deconvolution.cpp
+2
-2
src/targets/cpu/gemm.cpp
src/targets/cpu/gemm.cpp
+5
-1
src/targets/cpu/include/migraphx/cpu/context.hpp
src/targets/cpu/include/migraphx/cpu/context.hpp
+1
-0
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
+5
-1
src/targets/cpu/include/migraphx/cpu/lowering.hpp
src/targets/cpu/include/migraphx/cpu/lowering.hpp
+2
-3
src/targets/cpu/include/migraphx/cpu/target.hpp
src/targets/cpu/include/migraphx/cpu/target.hpp
+1
-2
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+2
-2
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+1
-1
src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
+1
-1
src/targets/fpga/subgraph.cpp
src/targets/fpga/subgraph.cpp
+1
-2
src/targets/fpga/vitis_ai_adapter.cpp
src/targets/fpga/vitis_ai_adapter.cpp
+1
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+35
-7
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+14
-2
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+10
-3
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+10
-6
src/targets/gpu/compile_miopen.cpp
src/targets/gpu/compile_miopen.cpp
+1
-1
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+181
-12
No files found.
src/sqlite.cpp
View file @
23cb7917
...
...
@@ -48,6 +48,7 @@ struct sqlite_impl
template
<
class
F
>
void
exec
(
const
char
*
sql
,
F
f
)
{
// cppcheck-suppress constParameterPointer
auto
callback
=
[](
void
*
obj
,
auto
...
xs
)
->
int
{
try
{
...
...
src/target.cpp
0 → 100644
View file @
23cb7917
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/target.hpp>
#include <migraphx/register_target.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
migraphx_to_value
(
value
&
v
,
const
target
&
t
)
{
v
[
"name"
]
=
t
.
name
();
}
void
migraphx_from_value
(
const
value
&
v
,
target
&
t
)
{
t
=
make_target
(
v
.
at
(
"name"
).
to
<
std
::
string
>
());
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/cpu/CMakeLists.txt
View file @
23cb7917
...
...
@@ -78,6 +78,8 @@ else()
endif
()
target_link_libraries
(
migraphx_cpu PRIVATE migraphx
)
migraphx_generate_export_header
(
migraphx_cpu
)
find_package
(
OpenMP
)
target_link_libraries
(
migraphx_cpu PUBLIC OpenMP::OpenMP_CXX
)
# Add library path to rpath to workaround issues with our broken packages
...
...
@@ -88,8 +90,6 @@ foreach(LIBRARY ${OpenMP_CXX_LIBRARIES})
endif
()
endforeach
()
target_link_libraries
(
migraphx_all_targets INTERFACE migraphx_cpu
)
rocm_install_targets
(
TARGETS migraphx_cpu
INCLUDE
...
...
src/targets/cpu/deconvolution.cpp
View file @
23cb7917
...
...
@@ -23,14 +23,14 @@
*/
#include <migraphx/config.hpp>
#include <migraphx/cpu/dnnl.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
cpu
{
struct
dnnl_deconvolution
:
dnnl_extend_op
<
dnnl_deconvolution
,
dnnl
::
deconvolution_forward
,
op
::
de
convolution
>
:
dnnl_extend_op
<
dnnl_deconvolution
,
dnnl
::
deconvolution_forward
,
op
::
convolution
_backwards
>
{
std
::
vector
<
int
>
arg_map
(
int
)
const
{
...
...
src/targets/cpu/gemm.cpp
View file @
23cb7917
...
...
@@ -43,7 +43,11 @@ struct dnnl_gemm : dnnl_extend_op<dnnl_gemm, dnnl::matmul, op::dot>
MIGRAPHX_DNNL_PREFIX
(
ARG_BIAS
)};
}
void
required
(
const
check_shapes
&
cs
)
const
{
cs
.
not_broadcasted
();
}
template
<
class
T
>
void
required
(
const
check_shapes
<
T
>&
cs
)
const
{
cs
.
not_broadcasted
();
}
dnnl
::
matmul
::
desc
get_desc
(
const
std
::
unordered_map
<
int
,
dnnl
::
memory
::
desc
>&
m
)
const
{
...
...
src/targets/cpu/include/migraphx/cpu/context.hpp
View file @
23cb7917
...
...
@@ -28,6 +28,7 @@
#include <migraphx/cpu/dnnl.hpp>
#include <migraphx/cpu/parallel.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/cpu/export.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
View file @
23cb7917
...
...
@@ -400,7 +400,11 @@ struct dnnl_extend_op : dnnl_op<Derived, Primitive>
}
// dnnl has some issues with non-packed inputs
void
required
(
const
check_shapes
&
cs
)
const
{
cs
.
packed_or_broadcasted
();
}
template
<
class
T
>
void
required
(
const
check_shapes
<
T
>&
cs
)
const
{
cs
.
packed_or_broadcasted
();
}
std
::
string
name
()
const
{
return
"dnnl::"
+
op
.
name
();
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
...
...
src/targets/cpu/include/migraphx/cpu/lowering.hpp
View file @
23cb7917
...
...
@@ -24,8 +24,7 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
#include <string>
#include <migraphx/config.hpp>
#include <migraphx/cpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -34,7 +33,7 @@ struct module;
namespace
cpu
{
struct
lowering
struct
MIGRAPHX_CPU_EXPORT
lowering
{
std
::
string
name
()
const
{
return
"cpu::lowering"
;
}
void
apply
(
module
&
m
)
const
;
...
...
src/targets/cpu/include/migraphx/cpu/target.hpp
View file @
23cb7917
...
...
@@ -28,14 +28,13 @@
#include <migraphx/register_target.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/cpu/context.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
pass
;
namespace
cpu
{
struct
target
struct
MIGRAPHX_CPU_EXPORT
target
{
std
::
string
name
()
const
;
std
::
vector
<
pass
>
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
)
const
;
...
...
src/targets/cpu/lowering.cpp
View file @
23cb7917
...
...
@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/
de
convolution.hpp>
#include <migraphx/op/convolution
_backwards
.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
@@ -345,7 +345,7 @@ struct cpu_apply
extend_op
(
"contiguous"
,
"dnnl::reorder"
);
extend_op
(
"convolution"
,
"dnnl::convolution"
);
#ifndef MIGRAPHX_ENABLE_ZENDNN
extend_op
(
"
de
convolution"
,
"dnnl::
de
convolution"
);
extend_op
(
"convolution
_backwards
"
,
"dnnl::convolution
_backwards
"
);
extend_op
(
"dot"
,
"dnnl::dot"
);
#endif
extend_op
(
"erf"
,
"cpu::erf"
);
...
...
src/targets/cpu/target.cpp
View file @
23cb7917
...
...
@@ -61,7 +61,7 @@ namespace cpu {
std
::
string
target
::
name
()
const
{
return
"cpu"
;
}
// cppcheck-suppress constParameter
// cppcheck-suppress constParameter
Reference
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
)
const
{
auto
&
ctx
=
any_cast
<
context
>
(
gctx
);
...
...
src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
View file @
23cb7917
...
...
@@ -41,7 +41,7 @@ class x_model
void
set_shape
(
migraphx
::
shape
);
};
x_model
create_xmodel
(
migraphx
::
module_ref
mod
);
x_model
create_xmodel
(
migraphx
::
const_
module_ref
mod
);
migraphx
::
argument
execute
(
const
x_model
&
xmodel
,
const
migraphx
::
shape
&
output_shape
,
...
...
src/targets/fpga/subgraph.cpp
View file @
23cb7917
...
...
@@ -113,8 +113,7 @@ void subgraph::apply(module_pass_manager& mpm) const
// TODO(varunsh): this code may be replaceable by code in the fuse_pointwise pass
// assuming all FPGA instructions are in one contiguous range
pm
->
insert_instructions
(
pm
->
end
(),
first
,
last
,
{});
pm
->
insert_instructions
(
pm
->
end
(),
first
,
std
::
next
(
last
),
{});
migraphx
::
instruction_ref
placeholder_ins
;
for
(
auto
it
:
iterator_for
(
mod
))
{
...
...
src/targets/fpga/vitis_ai_adapter.cpp
View file @
23cb7917
...
...
@@ -33,7 +33,7 @@ migraphx::shape x_model::get_shape() const { return shape; };
void
x_model
::
set_shape
(
migraphx
::
shape
s
)
{
shape
=
s
;
}
x_model
create_xmodel
(
const
migraphx
::
module_ref
mod
)
x_model
create_xmodel
(
migraphx
::
const_
module_ref
mod
)
{
std
::
cout
<<
"Calling an external function: create_xmodel!
\n
"
;
x_model
xmodel
;
...
...
src/targets/gpu/CMakeLists.txt
View file @
23cb7917
...
...
@@ -33,6 +33,11 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
if
(
NOT WIN32
)
# TODO: re-enable when CK is ported to Windows
find_package
(
composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library
)
endif
()
if
(
BUILD_DEV
)
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
"Use hipRTC APIs"
)
else
()
...
...
@@ -40,12 +45,12 @@ else()
endif
()
include
(
Embed
)
file
(
GLOB KERNEL_FILES
${
CONFIGURE_DEPENDS
}
file
(
GLOB KERNEL_FILES CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/
)
file
(
GLOB DEVICE_GPU_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
file
(
GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
add_library
(
compile_for_gpu INTERFACE
)
...
...
@@ -65,6 +70,8 @@ target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries
(
migraphx_device PRIVATE compile_for_gpu
)
target_include_directories
(
migraphx_device PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
target_include_directories
(
migraphx_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
target_compile_options
(
migraphx_device PRIVATE -Wno-ignored-attributes
)
migraphx_generate_export_header
(
migraphx_device DIRECTORY migraphx/gpu/device
)
add_library
(
kernel_file_check EXCLUDE_FROM_ALL
)
...
...
@@ -80,7 +87,13 @@ target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check
(
kernel_file_check
)
file
(
GLOB JIT_GPU_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/*.cpp
)
file
(
GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/*.cpp
)
if
(
WIN32
)
# TODO: re-enable when CK is ported to Windows
list
(
REMOVE_ITEM JIT_GPU_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/jit/ck_gemm.cpp
)
endif
()
add_library
(
migraphx_gpu
abs.cpp
analyze_streams.cpp
...
...
@@ -95,6 +108,7 @@ add_library(migraphx_gpu
compile_miopen.cpp
compiler.cpp
device_name.cpp
fuse_ck.cpp
fuse_mlir.cpp
fuse_ops.cpp
gather.cpp
...
...
@@ -123,11 +137,14 @@ add_library(migraphx_gpu
schedule_model.cpp
sync_device.cpp
target.cpp
time_op.cpp
topk.cpp
write_literals.cpp
${
JIT_GPU_SRCS
}
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
migraphx_generate_export_header
(
migraphx_gpu
)
function
(
register_migraphx_gpu_ops PREFIX
)
foreach
(
OP
${
ARGN
}
)
...
...
@@ -169,7 +186,7 @@ register_op(migraphx_gpu
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::
de
convolution> gpu::miopen_convolution<op::quant_convolution>
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::convolution
_backwards
> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
@@ -181,7 +198,9 @@ if(MIGRAPHX_ENABLE_MLIR)
find_package
(
rocMLIR 1.0.0 CONFIG REQUIRED
)
message
(
STATUS
"Build with rocMLIR::rockCompiler
${
rocMLIR_VERSION
}
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_MLIR"
)
target_link_libraries
(
migraphx_gpu PUBLIC rocMLIR::rockCompiler
)
# Make this private to avoid multiple inclusions of LLVM symbols.
# TODO: Fix rocMLIR's library to hide LLVM internals.
target_link_libraries
(
migraphx_gpu PRIVATE rocMLIR::rockCompiler
)
endif
()
if
(
MIGRAPHX_USE_HIPRTC
)
...
...
@@ -231,7 +250,12 @@ check_library_exists(roc::rocblas "rocblas_gemm_ex_get_solutions" "${ROCBLAS_LOC
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
if
(
MIGRAPHX_USE_FIND_2_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
check_library_exists
(
MIOpen
"miopenSetFindOptionPreallocatedTensor"
"
${
MIOPEN_LOCATION
}
"
HAS_PREALLOCATION_API
)
if
(
HAS_PREALLOCATION_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API -DMIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
)
else
()
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
endif
()
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
else
()
message
(
STATUS
"MIGraphx is using legacy Find API in MIOpen"
)
...
...
@@ -253,6 +277,10 @@ endif()
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
if
(
NOT WIN32
)
# TODO: re-enable when CK is ported to Windows
target_link_libraries
(
migraphx_gpu PRIVATE composable_kernel::jit_library
)
endif
()
add_subdirectory
(
driver
)
add_subdirectory
(
hiprtc
)
...
...
src/targets/gpu/compile_gen.cpp
View file @
23cb7917
...
...
@@ -29,6 +29,7 @@
#include <migraphx/module.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/instruction.hpp>
...
...
@@ -171,7 +172,8 @@ std::string make_transformer_args(std::vector<std::string> transformers)
void
generate_pointwise
(
cpp_generator
&
gg
,
const
module
&
pm
,
const
std
::
string
&
name
)
{
module
m
=
pm
;
run_passes
(
m
,
{
eliminate_common_subexpression
{},
dead_code_elimination
{}});
run_passes
(
m
,
{
rewrite_quantization
{},
eliminate_common_subexpression
{},
dead_code_elimination
{}});
cpp_generator
g
;
g
.
fmap
([](
const
std
::
string
&
fname
)
{
return
"migraphx::"
+
fname
;
});
g
.
add_point_op
(
"where"
,
"${function:where}(${0}, ${1}, ${2})"
);
...
...
@@ -280,6 +282,14 @@ std::string generate_reduce(const module& m, const std::string& name)
not
input
->
get_shape
().
broadcasted
();
});
auto
inner_names
=
names
;
for
(
auto
input
:
ins
->
inputs
())
{
if
(
input
->
name
()
!=
"@param"
)
continue
;
if
(
contains
(
tensors
,
input
))
continue
;
inner_names
[
input
]
+=
"[out_idx]"
;
}
for
(
auto
input
:
tensors
)
inner_names
[
input
]
+=
"_lambda_param"
;
auto
call_function
=
...
...
@@ -308,6 +318,8 @@ std::string generate_reduce(const module& m, const std::string& name)
});
f
.
set_attributes
({
"__device__"
,
"__attribute__((const))"
}).
set_generic_types
(
m
).
set_name
(
name
);
f
.
add_generic_param
(
"r"
);
f
.
add_generic_param
(
"out_idx"
);
f
.
unused_param
(
"out_idx"
);
g
.
create_function
(
f
);
return
g
.
str
();
}
...
...
@@ -319,7 +331,7 @@ static std::vector<std::string> get_op_names(const module& m)
{
if
(
starts_with
(
ins
.
name
(),
"@"
))
continue
;
if
(
ins
.
name
()
==
"multibroadcast"
)
if
(
contains
({
"multibroadcast"
,
"contiguous"
},
ins
.
name
())
)
continue
;
if
(
ins
.
name
()
==
"pointwise"
)
{
...
...
src/targets/gpu/compile_hip.cpp
View file @
23cb7917
...
...
@@ -56,9 +56,6 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC);
#ifdef MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_HIPRTC
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS
);
std
::
string
hiprtc_error
(
hiprtcResult
err
,
const
std
::
string
&
msg
)
{
return
"hiprtc: "
+
(
hiprtcGetErrorString
(
err
)
+
(
": "
+
msg
));
...
...
@@ -194,6 +191,7 @@ std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_sr
options
.
push_back
(
"-DMIGRAPHX_HAS_DPP=0"
);
options
.
push_back
(
"-DMIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1"
);
options
.
push_back
(
"-Wno-reserved-identifier"
);
options
.
push_back
(
"-Wno-unused-parameter"
);
options
.
push_back
(
"-Wno-gnu-line-marker"
);
options
.
push_back
(
"-Wno-old-style-cast"
);
}
...
...
@@ -216,6 +214,15 @@ std::vector<std::vector<char>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
)
{
std
::
vector
<
hiprtc_src_file
>
hsrcs
{
srcs
.
begin
(),
srcs
.
end
()};
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
{
for
(
const
auto
&
src
:
srcs
)
{
if
(
src
.
path
.
extension
()
!=
".cpp"
)
continue
;
std
::
cout
<<
std
::
string
(
src
.
content
.
first
,
src
.
len
())
<<
std
::
endl
;
}
}
auto
p
=
dynamic_loader
::
path
(
&
compile_hip_src_with_hiprtc
);
auto
driver
=
p
.
parent_path
().
parent_path
()
/
"bin"
/
"migraphx-hiprtc-driver"
;
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
23cb7917
...
...
@@ -135,10 +135,14 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
return
[
n
,
over
,
max_global
](
std
::
size_t
local
)
{
std
::
size_t
groups
=
(
n
+
local
-
1
)
/
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
return
std
::
min
(
nglobal
,
n
);
// hip require global workitems multiple of local workitems. It may degrade performance.
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available.
// https://reviews.llvm.org/D155213
std
::
size_t
num_elements
=
((
n
+
local
-
1
)
/
local
)
*
local
;
std
::
size_t
groups
=
(
num_elements
+
local
-
1
)
/
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
return
std
::
min
(
nglobal
,
num_elements
);
};
}
...
...
@@ -156,14 +160,14 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
assert
(
not
options
.
inputs
.
empty
());
assert
(
options
.
inputs
.
size
()
==
options
.
virtual_inputs
.
size
()
or
options
.
virtual_inputs
.
empty
());
std
::
vector
<
src_file
>
srcs
;
std
::
vector
<
src_file
>
srcs
=
options
.
additional_src_files
;
std
::
transform
(
migraphx_kernels
().
begin
(),
migraphx_kernels
().
end
(),
std
::
back_inserter
(
srcs
),
[](
auto
&&
p
)
{
auto
&&
name
=
p
.
first
;
auto
&&
c
=
p
.
second
;
auto
path
=
fs
::
path
{
"migraphx"
}
/
"kernels"
/
name
;
auto
path
=
name
;
return
src_file
{
path
,
c
};
});
srcs
.
push_back
(
src_file
{
fs
::
path
{
"main.cpp"
},
...
...
src/targets/gpu/compile_miopen.cpp
View file @
23cb7917
...
...
@@ -79,7 +79,7 @@ void compile_miopen::apply(module& m) const
std
::
size_t
ws
=
0
;
try
{
// for the regular convolution and
de
convolution, this try would always succeed
// for the regular convolution and convolution
_backwards
, this try would always succeed
ws
=
compile
(
op
,
ins
,
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
...
...
src/targets/gpu/compile_ops.cpp
View file @
23cb7917
...
...
@@ -30,6 +30,7 @@
#include <migraphx/register_op.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/time_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -76,33 +77,201 @@ struct compiled_result
instruction_ref
ins
;
};
struct
problem_cache
{
bool
has
(
const
std
::
string
&
name
,
const
value
&
problem
)
const
{
return
contains
(
cache
,
create_key
(
name
,
problem
));
}
void
insert
(
const
std
::
string
&
name
,
const
value
&
problem
,
const
value
&
solution
)
{
assert
(
not
solution
.
is_null
());
cache
[
create_key
(
name
,
problem
)]
=
solution
;
}
void
mark
(
const
std
::
string
&
name
,
const
value
&
problem
)
{
cache
.
insert
(
std
::
make_pair
(
create_key
(
name
,
problem
),
value
{}));
}
optional
<
value
>
get
(
const
std
::
string
&
name
,
const
value
&
problem
)
const
{
auto
it
=
cache
.
find
(
create_key
(
name
,
problem
));
if
(
it
==
cache
.
end
())
return
nullopt
;
return
it
->
second
;
}
static
value
create_key
(
const
std
::
string
&
name
,
const
value
&
problem
)
{
return
{{
"name"
,
name
},
{
"problem"
,
problem
}};
}
std
::
unordered_map
<
value
,
value
>
cache
;
};
struct
compile_plan
{
context
*
ctx
;
operation
preop
;
instruction_ref
ins
;
optional
<
tuning_config
>
config
=
nullopt
;
std
::
vector
<
optional
<
compiled_result
>>
results
=
{};
void
update_config
(
bool
exhaustive
)
{
config
=
get_tuning_config
(
*
ctx
,
ins
,
preop
,
exhaustive
);
}
template
<
class
Vector
>
void
insert_compiles
(
Vector
&
compiles
,
const
value
&
solution
,
std
::
size_t
i
)
{
compiles
.
emplace_back
([
=
]
{
try
{
results
[
i
]
=
compiled_result
{
compile
(
*
ctx
,
ins
,
preop
,
solution
),
ins
};
}
catch
(...)
{
results
[
i
]
=
nullopt
;
}
});
}
template
<
class
Vector
>
void
add_compiles
(
Vector
&
compiles
,
problem_cache
&
pc
)
{
if
(
config
.
has_value
())
{
const
auto
&
problem
=
config
->
problem
;
if
(
auto
sol
=
pc
.
get
(
preop
.
name
(),
problem
))
{
auto
solution
=
sol
.
value
();
// No solution yet until benchmarked so skip for now
if
(
solution
.
is_null
())
return
;
results
.
resize
(
1
);
insert_compiles
(
compiles
,
solution
,
0
);
}
else
{
pc
.
mark
(
preop
.
name
(),
problem
);
const
auto
&
solutions
=
config
->
solutions
;
results
.
resize
(
solutions
.
size
());
for
(
auto
i
:
range
(
solutions
.
size
()))
{
auto
solution
=
solutions
[
i
];
insert_compiles
(
compiles
,
solution
,
i
);
}
}
}
else
{
results
.
resize
(
1
);
insert_compiles
(
compiles
,
value
{},
0
);
}
}
const
compiled_result
&
benchmark
(
problem_cache
&
pc
)
const
{
if
(
results
.
empty
())
MIGRAPHX_THROW
(
"No configs to tune"
);
if
(
results
.
size
()
==
1
)
{
if
(
not
results
.
front
().
has_value
())
MIGRAPHX_THROW
(
"No configs to tune"
);
return
*
results
.
front
();
}
if
(
not
config
)
MIGRAPHX_THROW
(
"Multiple kernels without config"
);
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
<<
std
::
endl
;
std
::
vector
<
double
>
times
;
times
.
reserve
(
results
.
size
());
std
::
transform
(
results
.
begin
(),
results
.
end
(),
std
::
back_inserter
(
times
),
[
&
](
const
auto
&
cr
)
{
if
(
not
cr
.
has_value
())
return
std
::
numeric_limits
<
double
>::
max
();
return
time_op
(
*
ctx
,
cr
->
replace
.
code_object
,
to_shapes
(
cr
->
ins
->
inputs
()),
20
)
.
first
;
});
auto
i
=
std
::
distance
(
times
.
begin
(),
std
::
min_element
(
times
.
begin
(),
times
.
end
()));
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
pc
.
insert
(
preop
.
name
(),
config
->
problem
,
config
->
solutions
.
at
(
i
));
if
(
not
results
[
i
].
has_value
())
MIGRAPHX_THROW
(
"No valid tuned compilation."
);
return
*
results
[
i
];
}
void
replace
(
module
&
m
,
problem_cache
&
pc
)
const
{
const
auto
&
cr
=
benchmark
(
pc
);
cr
.
replace
.
replace
(
m
,
cr
.
ins
);
}
};
template
<
class
F
>
void
par_compile
(
std
::
size_t
n
,
F
f
)
{
if
(
n
==
0
)
return
;
par_for
(
n
,
n
/
value_of
(
MIGRAPHX_GPU_COMPILE_PARALLEL
{},
n
),
f
);
auto
d
=
value_of
(
MIGRAPHX_GPU_COMPILE_PARALLEL
{});
if
(
d
==
0
)
d
=
n
;
par_for
(
n
,
n
/
d
,
f
);
}
void
compile_
ops
::
apply
(
module
&
m
)
const
struct
compile_
manager
{
std
::
vector
<
std
::
function
<
compiled_result
()
>>
compiles
;
problem_cache
pc
;
std
::
vector
<
compile_plan
>
cps
;
bool
exhaustive
=
false
;
template
<
class
...
Ts
>
void
add_plan
(
Ts
&&
...
xs
)
{
cps
.
push_back
({
std
::
forward
<
Ts
>
(
xs
)...});
}
void
update_configs
()
{
par_compile
(
cps
.
size
(),
[
&
](
auto
i
)
{
cps
[
i
].
update_config
(
exhaustive
);
});
}
void
compile
(
module
&
m
)
{
std
::
vector
<
std
::
function
<
void
()
>>
compiles
;
for
(
auto
&
cp
:
cps
)
{
cp
.
add_compiles
(
compiles
,
pc
);
}
par_compile
(
compiles
.
size
(),
[
&
](
auto
i
)
{
compiles
[
i
]();
});
// Replace and/or benchmark
for
(
const
auto
&
cp
:
cps
)
{
if
(
cp
.
results
.
empty
())
continue
;
cp
.
replace
(
m
,
pc
);
}
// Remove compile_plan already executed
cps
.
erase
(
std
::
remove_if
(
cps
.
begin
(),
cps
.
end
(),
[](
const
auto
&
cp
)
{
return
not
cp
.
results
.
empty
();
}),
cps
.
end
());
}
};
void
compile_ops
::
apply
(
module
&
m
)
const
{
compile_manager
cm
;
cm
.
exhaustive
=
exhaustive_tune
;
// Find all precompile opes
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
!=
"gpu::precompile_op"
)
continue
;
operation
preop
=
any_cast
<
precompile_op
>
(
ins
->
get_operator
()).
op
;
compiles
.
emplace_back
([
=
]()
->
compiled_result
{
return
{
compile
(
*
ctx
,
ins
,
preop
),
ins
};
});
}
std
::
vector
<
compiled_result
>
results
(
compiles
.
size
());
par_compile
(
compiles
.
size
(),
[
&
](
auto
i
)
{
results
[
i
]
=
compiles
[
i
]();
});
for
(
const
auto
&
cr
:
results
)
{
cr
.
replace
(
m
,
cr
.
ins
);
cm
.
add_plan
(
ctx
,
preop
,
ins
);
}
cm
.
update_configs
();
cm
.
compile
(
m
);
// Compile already tuned configs
cm
.
compile
(
m
);
assert
(
cm
.
cps
.
empty
());
}
}
// namespace gpu
...
...
Prev
1
…
8
9
10
11
12
13
14
15
16
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment