Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
dcb98a60
Commit
dcb98a60
authored
Aug 30, 2023
by
Paul
Browse files
Merge branch 'develop' into ubuntu-22.04-default
parents
d05768a4
d2486dcd
Changes
114
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
224 additions
and
146 deletions
+224
-146
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+1
-1
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+7
-1
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+2
-2
src/targets/gpu/device/topk.cpp
src/targets/gpu/device/topk.cpp
+2
-2
src/targets/gpu/device_name.cpp
src/targets/gpu/device_name.cpp
+1
-15
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+35
-30
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+2
-8
src/targets/gpu/include/migraphx/gpu/device_name.hpp
src/targets/gpu/include/migraphx/gpu/device_name.hpp
+0
-2
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+3
-2
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+2
-1
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+6
-4
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+84
-58
src/targets/gpu/no_device.cpp
src/targets/gpu/no_device.cpp
+28
-0
src/targets/gpu/time_op.cpp
src/targets/gpu/time_op.cpp
+1
-1
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+1
-1
src/value.cpp
src/value.cpp
+1
-1
test/CMakeLists.txt
test/CMakeLists.txt
+10
-11
test/api/test_cpu.cpp
test/api/test_cpu.cpp
+3
-3
test/eliminate_contiguous_test.cpp
test/eliminate_contiguous_test.cpp
+34
-2
No files found.
src/targets/gpu/compile_gen.cpp
View file @
dcb98a60
...
...
@@ -331,7 +331,7 @@ static std::vector<std::string> get_op_names(const module& m)
{
if
(
starts_with
(
ins
.
name
(),
"@"
))
continue
;
if
(
ins
.
name
()
==
"multibroadcast"
)
if
(
contains
({
"multibroadcast"
,
"contiguous"
},
ins
.
name
())
)
continue
;
if
(
ins
.
name
()
==
"pointwise"
)
{
...
...
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
dcb98a60
...
...
@@ -41,7 +41,7 @@ struct index
__device__
index_int
nglobal
()
const
{
return
blockDim
.
x
*
gridDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
__device__
index_int
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
template
<
class
F
>
__device__
void
global_stride
(
index_int
n
,
F
f
)
const
...
...
@@ -81,6 +81,12 @@ inline auto launch(hipStream_t stream, index_int global, index_int local)
dim3
nthreads
(
local
);
// cppcheck-suppress UseDeviceLaunch
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
stream
,
f
);
hipError_t
kernel_launch_status
=
hipGetLastError
();
if
(
kernel_launch_status
!=
hipSuccess
)
{
MIGRAPHX_THROW
(
"MIGraphX device kernel failed to launch with error: "
+
std
::
string
(
hipGetErrorString
(
kernel_launch_status
)));
}
};
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
dcb98a60
...
...
@@ -124,7 +124,7 @@ void nary_broadcast_vec_impl(
buffer
[
i
]
=
binput
.
data
()[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
...
...
@@ -219,7 +219,7 @@ void nary_double_broadcast_vec_impl(
buffer
[
i
+
bdim_vec_len
]
=
binput2
.
data
()[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
const
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
...
...
src/targets/gpu/device/topk.cpp
View file @
dcb98a60
...
...
@@ -72,12 +72,12 @@ struct hip_heap_vector
index_int
l
=
2
*
index
+
1
;
index_int
r
=
2
*
index
+
2
;
if
(
l
<
n
&&
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
index
)]))
if
(
l
<
n
and
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
index
)]))
{
index
=
l
;
}
if
(
r
<
n
&&
compare
(
data
[
data_index
(
r
)],
data
[
data_index
(
index
)]))
if
(
r
<
n
and
compare
(
data
[
data_index
(
r
)],
data
[
data_index
(
index
)]))
{
index
=
r
;
if
(
compare
(
data
[
data_index
(
l
)],
data
[
data_index
(
r
)]))
...
...
src/targets/gpu/device_name.cpp
View file @
dcb98a60
...
...
@@ -31,20 +31,6 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
template
<
class
HipDeviceProp
>
std
::
string
get_arch_name
(
rank
<
0
>
,
const
HipDeviceProp
&
props
)
{
return
"gfx"
+
std
::
to_string
(
props
.
gcnArch
);
}
template
<
class
HipDeviceProp
>
auto
get_arch_name
(
rank
<
1
>
,
const
HipDeviceProp
&
props
)
->
decltype
(
std
::
string
(
props
.
gcnArchName
))
{
return
std
::
string
(
props
.
gcnArchName
);
}
std
::
string
get_arch_name
(
const
hipDeviceProp_t
&
props
)
{
return
get_arch_name
(
rank
<
1
>
{},
props
);
}
int
get_device_id
()
{
int
device
;
...
...
@@ -60,7 +46,7 @@ std::string get_device_name()
auto
status
=
hipGetDeviceProperties
(
&
props
,
get_device_id
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to get device properties"
);
return
get_a
rch
_n
ame
(
props
)
;
return
props
.
gcnA
rch
N
ame
;
}
}
// namespace gpu
...
...
src/targets/gpu/fuse_mlir.cpp
View file @
dcb98a60
...
...
@@ -86,7 +86,7 @@ struct mlir_op
size_t
param_cnt
=
0
;
std
::
vector
<
std
::
string
>
names
=
mod
->
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
std
::
string
param_name
:
names
)
for
(
const
std
::
string
&
param_name
:
names
)
{
ins_shapes
[
mod
->
get_parameter
(
param_name
)]
=
inputs
[
param_cnt
++
];
}
...
...
@@ -210,42 +210,47 @@ struct find_mlir_op
return
false
;
}
const
std
::
initializer_list
<
std
::
string
>
any_type_ops
=
{
"@literal"
,
"@param"
,
"@return"
};
const
std
::
initializer_list
<
std
::
string
>
no_bool_ops
=
{
"convolution"
,
"quant_convolution"
,
"dot"
,
"quant_dot"
,
"add"
,
"clip"
,
"relu"
,
"sub"
,
"mul"
,
"div"
,
"pow"
,
"where"
,
"quantizelinear"
,
"dequantizelinear"
,
"abs"
,
"neg"
};
const
std
::
initializer_list
<
std
::
string
>
fp_only_ops
=
{
"ceil"
,
"erf"
,
"exp"
,
"floor"
,
"log"
,
"recip"
,
"rsqrt"
,
"sigmoid"
"softmax"
,
"tanh"
};
const
std
::
initializer_list
<
std
::
string
>
no_bool_ops
=
{
"convolution"
,
"quant_convolution"
,
"dot"
,
"quant_dot"
,
"add"
,
"clip"
,
"relu"
,
"sub"
,
"mul"
,
"div"
,
"pow"
,
"where"
,
"quantizelinear"
,
"dequantizelinear"
,
"abs"
,
"neg"
,
};
const
std
::
initializer_list
<
std
::
string
>
fp_only_ops
=
{
"ceil"
,
"erf"
,
"exp"
,
"floor"
,
"log"
,
"recip"
,
"rsqrt"
,
// There are bugs in MLIR right now for models using sigmoid so disable it for now
// "sigmoid",
"softmax"
,
"tanh"
,
};
bool
is_float
=
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
result_type
);
if
(
contains
(
any_type_ops
,
name
))
return
true
;
if
(
result_type
!=
type_t
::
bool_type
&&
contains
(
no_bool_ops
,
name
))
if
(
result_type
!=
type_t
::
bool_type
and
contains
(
no_bool_ops
,
name
))
return
true
;
if
(
is_float
&&
contains
(
fp_only_ops
,
name
))
if
(
is_float
and
contains
(
fp_only_ops
,
name
))
return
true
;
// Only conversions between floating types are known to be unambigiously
// supported.
if
(
is_float
&&
name
==
"convert"
)
if
(
is_float
and
name
==
"convert"
)
{
return
std
::
all_of
(
i
.
inputs
().
begin
(),
i
.
inputs
().
end
(),
[](
const
auto
&
arg
)
{
return
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
arg
->
get_shape
().
type
());
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
dcb98a60
...
...
@@ -46,13 +46,7 @@ using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy);
struct
hip_device
{
hip_device
()
{
device_props
.
gcnArchName
[
0
]
=
'\0'
;
device_props
.
gcnArch
=
0
;
device_props
.
multiProcessorCount
=
0
;
add_stream
();
}
hip_device
()
:
device_props
{}
{
add_stream
();
}
hip_device
(
std
::
size_t
id
,
std
::
size_t
n
)
:
device_id
(
id
)
{
...
...
@@ -171,7 +165,7 @@ struct hip_device
std
::
size_t
stream_id
()
const
{
return
current_stream
;
}
std
::
string
get_device_name
()
const
{
return
get_arch_name
(
device_props
)
;
}
std
::
string
get_device_name
()
const
{
return
device_props
.
gcnArchName
;
}
std
::
string
get_gfx_name
()
const
{
return
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
}
...
...
src/targets/gpu/include/migraphx/gpu/device_name.hpp
View file @
dcb98a60
...
...
@@ -33,8 +33,6 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
MIGRAPHX_GPU_EXPORT
std
::
string
get_arch_name
(
const
hipDeviceProp_t
&
props
);
MIGRAPHX_GPU_EXPORT
std
::
string
get_device_name
();
MIGRAPHX_GPU_EXPORT
int
get_device_id
();
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
dcb98a60
...
...
@@ -92,7 +92,7 @@ struct hip_sync_stream
return
inputs
.
front
();
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
const
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
gpu_sync
(
ctx
);
if
(
args
.
empty
())
...
...
src/targets/gpu/include/migraphx/gpu/mlir.hpp
View file @
dcb98a60
...
...
@@ -37,7 +37,7 @@ struct module;
namespace
gpu
{
MIGRAPHX_GPU_EXPORT
std
::
string
dump_mlir
(
const
module
&
m
);
MIGRAPHX_GPU_EXPORT
code_object_op
compile_mlir
(
const
context
&
ctx
,
MIGRAPHX_GPU_EXPORT
code_object_op
compile_mlir
(
const
context
&
migraphx_
ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
,
const
value
&
solution
);
...
...
@@ -47,7 +47,8 @@ MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
MIGRAPHX_GPU_EXPORT
tuning_config
get_tuning_config_mlir
(
module
m
,
MIGRAPHX_GPU_EXPORT
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
);
}
// namespace gpu
...
...
src/targets/gpu/jit/ck_gemm.cpp
View file @
dcb98a60
...
...
@@ -300,7 +300,8 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
c_shape
=
inputs
.
back
();
auto
rank
=
a_shape
.
lens
().
size
();
// cppcheck-suppress unreadVariable
auto
rank
=
a_shape
.
ndim
();
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
m
=
c_shape
.
lens
()[
rank
-
2
];
...
...
src/targets/gpu/jit/mlir.cpp
View file @
dcb98a60
...
...
@@ -37,7 +37,7 @@ struct mlir_compiler : compiler<mlir_compiler>
operation
compile_op
(
context
&
,
const
std
::
vector
<
shape
>&
,
const
value
&
)
const
{
return
{};
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
,
const
value
&
solution
)
const
compile
(
const
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
,
const
value
&
solution
)
const
{
auto
*
smod
=
ins
->
module_inputs
().
front
();
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
...
...
@@ -52,14 +52,16 @@ struct mlir_compiler : compiler<mlir_compiler>
}};
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
,
instruction_ref
ins
,
const
operation
&
,
bool
exhaustive
)
const
optional
<
tuning_config
>
get_tuning_config
(
const
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
,
bool
exhaustive
)
const
{
if
(
not
exhaustive
)
return
nullopt
;
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
*
smod
=
ins
->
module_inputs
().
front
();
return
get_tuning_config_mlir
(
*
smod
,
shapes
);
return
get_tuning_config_mlir
(
ctx
,
*
smod
,
shapes
);
}
};
...
...
src/targets/gpu/mlir.cpp
View file @
dcb98a60
...
...
@@ -36,7 +36,10 @@
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
// Only undefine when not using cppcheck
#ifndef CPPCHECK
#undef MIGRAPHX_MLIR
#endif
#else
#include <mlir-c/RegisterRocMLIR.h>
#endif
...
...
@@ -50,6 +53,7 @@
#include <migraphx/ranges.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/gpu/tuning_config.hpp>
...
...
@@ -172,12 +176,6 @@ std::string mlir_print(F f, T x)
return
ss
.
str
();
}
bool
has_xdlops
(
const
std
::
string
&
target_arch
)
{
const
auto
device_name
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
return
(
starts_with
(
device_name
,
"gfx9"
)
and
device_name
>=
"gfx908"
);
}
struct
mlir_program
{
mlir_program
()
...
...
@@ -512,7 +510,8 @@ struct mlir_program
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
sym_name
},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
{
"arch"
,
target_arch
},
{
"num_cu"
,
num_cu
}});
ops
.
add_region
(
std
::
move
(
region
));
insert
(
body
,
std
::
move
(
ops
));
...
...
@@ -559,14 +558,7 @@ struct mlir_program
static
std
::
string
get_symbol_name
(
const
module
&
m
)
{
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
return
"mlir_"
+
ins
->
name
();
}
}
return
"main"
;
return
"mlir_"
+
gen
::
generate_name_from_ops
(
m
);
}
void
parse
(
const
module
&
m
)
...
...
@@ -602,9 +594,6 @@ struct mlir_program
{
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
// check if HW supports xdlops
if
(
has_xdlops
(
target_arch
))
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
std
::
vector
<
MlirValue
>
inputs
;
...
...
@@ -653,7 +642,12 @@ struct mlir_program
return
op
;
}
void
find_target
()
{
target_arch
=
get_device_name
();
}
void
set_gpu_properties
(
const
context
&
migraphx_ctx
)
{
const
auto
&
device
=
migraphx_ctx
.
get_current_device
();
target_arch
=
device
.
get_device_name
();
num_cu
=
device
.
get_cu_count
();
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
...
...
@@ -667,7 +661,7 @@ struct mlir_program
value
::
binary
get_binary
()
const
{
in
t
size
=
0
;
size_
t
size
=
0
;
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
nullptr
);
value
::
binary
result
(
size
);
if
(
mlirGetBinary
(
mmodule
.
get
(),
&
size
,
reinterpret_cast
<
char
*>
(
result
.
data
())))
...
...
@@ -675,30 +669,41 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
void
set_tuning
(
const
value
&
v
)
void
set_tuning
(
const
value
&
v
)
MIGRAPHX_TIDY_CONST
{
auto
str
=
v
.
to
<
std
::
string
>
();
// We need to make a copy of the buffer since mlirRockTuningSetFromStr may modify the string
std
::
vector
<
char
>
buffer
(
str
.
begin
(),
str
.
end
());
buffer
.
push_back
(
0
);
if
(
not
mlirRockTuningSetFromStr
(
mmodule
.
get
(),
buffer
.
data
()))
MIGRAPHX_THROW
(
"Failed setting tuning key: "
+
str
);
const
auto
*
str
=
v
.
if_string
();
if
(
str
==
nullptr
)
MIGRAPHX_THROW
(
"mlir tuning solutions must be strings"
);
if
(
not
mlirRockTuningSetFromStr
(
mmodule
.
get
(),
make_mlir_string_ref
(
*
str
)))
MIGRAPHX_THROW
(
"Failed setting tuning key: "
+
*
str
);
}
tuning_config
get_tuning_config
()
MIGRAPHX_TIDY_CONST
{
tuning_config
tc
;
run_high_level_pipeline
();
mlir_tuning_space
params
{
mlirRockTuningSpaceCreate
(
mmodule
.
get
())};
for
(
auto
i
:
range
(
mlirRockTuningGetNumParamsFull
(
params
.
get
())))
mlir_tuning_space
params
{
mlirRockTuningSpaceCreate
(
mmodule
.
get
(),
RocmlirTuningParamSetKindFull
)};
for
(
auto
i
:
range
(
mlirRockTuningGetNumParams
(
params
.
get
())))
{
mlir_tuning_param
param
{
mlirRockTuningParamCreate
()};
if
(
not
mlirRockTuningParamGet
(
params
.
get
(),
i
,
param
.
get
()))
MIGRAPHX_THROW
(
"Incorrect mlir tuning parameter: "
+
std
::
to_string
(
i
));
tc
.
solutions
.
push_back
(
std
::
string
{
mlirRockTuningGetParamStr
(
param
.
get
())});
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
perf_key
;
size_t
perf_key_bytes
=
mlirRockTuningParamToString
(
param
.
get
(),
perf_key
.
data
(),
perf_key
.
size
());
if
(
perf_key_bytes
>
perf_key
.
size
())
MIGRAPHX_THROW
(
"Tuning perf key was "
+
std
::
to_string
(
perf_key_bytes
)
+
" bytes and thus too long"
);
tc
.
solutions
.
emplace_back
(
perf_key
.
begin
(),
perf_key
.
begin
()
+
perf_key_bytes
);
}
mlir_tuning_table
tuning_table
{
mlirRockTuningTableCreate
()};
tc
.
problem
=
std
::
string
{
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
())};
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
tuning_key
;
size_t
tuning_key_bytes
=
mlirRockTuningGetKey
(
mmodule
.
get
(),
tuning_key
.
data
(),
tuning_key
.
size
());
if
(
tuning_key_bytes
>
tuning_key
.
size
())
MIGRAPHX_THROW
(
"Tuning table key was "
+
std
::
to_string
(
tuning_key_bytes
)
+
" bytes and thus too long"
);
tc
.
problem
=
std
::
string
(
tuning_key
.
begin
(),
tuning_key
.
begin
()
+
tuning_key_bytes
);
return
tc
;
}
...
...
@@ -706,10 +711,10 @@ struct mlir_program
// This function appends to tuning cfg file that could be
// used with rocMLIR tuning scripts.
void
dump_tuning_cfg
(
const
char
*
prob_config
)
const
void
dump_tuning_cfg
(
const
std
::
string
&
prob_config
)
const
{
std
::
string
tuning_cfg_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_CFG
{});
if
(
!
tuning_cfg_path
.
empty
())
if
(
not
tuning_cfg_path
.
empty
())
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
prob_config
,
'\t'
);
std
::
string
prob
=
tokens
[
1
];
...
...
@@ -726,51 +731,66 @@ struct mlir_program
}
}
static
mlir_tuning_table
create
_tuning_table
()
static
std
::
pair
<
mlir_tuning_table
,
bool
>
load
_tuning_table
()
{
mlir_tuning_table
tuning_table
{
mlirRockTuningTableCreate
()};
bool
found_table
=
false
;
std
::
string
tuning_db_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_DB
{});
if
(
!
tuning_db_path
.
empty
())
if
(
not
tuning_db_path
.
empty
())
{
std
::
ifstream
tuning_db_tsv
(
tuning_db_path
);
if
(
tuning_db_tsv
)
{
found_table
=
true
;
std
::
string
line
;
while
(
std
::
getline
(
tuning_db_tsv
,
line
))
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
line
,
'\t'
);
std
::
string
arch
=
tokens
[
0
];
std
::
string
prob
=
tokens
[
1
];
std
::
string
perf
=
tokens
[
2
];
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
prob
);
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
key
.
c_str
(),
perf
.
c_str
(),
1.0
);
std
::
string
num_cu
=
tokens
[
1
];
std
::
string
prob
=
tokens
[
2
];
std
::
string
perf
=
tokens
[
3
];
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
num_cu
).
append
(
"
\t
"
).
append
(
prob
);
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
make_mlir_string_ref
(
key
),
make_mlir_string_ref
(
perf
),
1.0
);
}
}
}
else
{
found_table
=
false
;
std
::
cerr
<<
"WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for "
"optimal performance."
<<
std
::
endl
;
}
return
tuning_table
;
return
std
::
make_pair
(
std
::
move
(
tuning_table
),
found_table
)
;
}
bool
get_module_tuned
()
const
{
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
// The tuning table as currently implemented is currently not
// thread safe. This will be fixed in the future. For now,
// stick a mutex around all tuning table interaction.
static
std
::
mutex
lock
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
lock
);
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
static
std
::
pair
<
mlir_tuning_table
,
bool
>
tuning_table
=
load_tuning_table
();
if
(
not
mlirRockTuningSetFromTable
(
tuning_table
.
first
.
get
(),
mmodule
.
get
()))
{
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
std
::
stringstream
key
(
prob_config
);
std
::
cerr
<<
"fails to set param on"
<<
prob_config
<<
std
::
endl
;
dump_tuning_cfg
(
prob_config
);
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
prob_config
;
size_t
prob_config_bytes
=
mlirRockTuningGetKey
(
mmodule
.
get
(),
prob_config
.
data
(),
prob_config
.
size
());
if
(
prob_config_bytes
>=
prob_config
.
size
())
{
std
::
cerr
<<
"MLIR tuning key overflowed buffer, needed "
<<
prob_config_bytes
<<
" bytes"
<<
std
::
endl
;
return
false
;
}
std
::
string
prob_config_str
(
prob_config
.
begin
(),
prob_config
.
begin
()
+
prob_config_bytes
);
if
(
tuning_table
.
second
)
{
std
::
cerr
<<
"NOTE: MLIR tuning table did not include a key for "
<<
prob_config_str
<<
std
::
endl
;
}
dump_tuning_cfg
(
prob_config_str
);
return
false
;
}
return
true
;
...
...
@@ -781,7 +801,8 @@ struct mlir_program
mlir_module
mmodule
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_arch
;
std
::
string
target_arch
=
""
;
std
::
size_t
num_cu
=
0
;
std
::
string
sym_name
;
};
...
...
@@ -838,7 +859,7 @@ void adjust_param_shapes(module& m, const std::vector<shape>& inputs)
}
}
code_object_op
compile_mlir
(
const
context
&
,
code_object_op
compile_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
,
const
value
&
solution
)
...
...
@@ -850,7 +871,7 @@ code_object_op compile_mlir(const context&,
std
::
cout
<<
m
<<
std
::
endl
;
mlir_program
mp
;
mp
.
find_target
(
);
mp
.
set_gpu_properties
(
migraphx_ctx
);
mp
.
parse
(
m
);
auto
mod_op
=
mlirModuleGetOperation
(
mp
.
mmodule
.
get
());
if
(
trace
)
...
...
@@ -877,12 +898,13 @@ instruction_ref insert_mlir(module& m,
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
}
tuning_config
get_tuning_config_mlir
(
module
m
,
const
std
::
vector
<
shape
>&
inputs
)
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
)
{
adjust_param_shapes
(
m
,
inputs
);
mlir_program
mp
;
mp
.
find_target
(
);
mp
.
set_gpu_properties
(
migraphx_ctx
);
mp
.
parse
(
m
);
return
mp
.
get_tuning_config
();
}
...
...
@@ -909,10 +931,14 @@ instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
{
use
(
co
);
use
(
m
);
return
m
.
end
();
}
tuning_config
get_tuning_config_mlir
(
module
,
const
std
::
vector
<
shape
>&
)
{
return
{};
}
tuning_config
get_tuning_config_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
shape
>&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
#endif
...
...
src/targets/gpu/no_device.cpp
0 → 100644
View file @
dcb98a60
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifdef __HIP_DEVICE_COMPILE__
#error \
"Device compilation not allowed for migraphx_gpu. Do not link with hip::device. Device code should go into migraphx_device or migraphx_kernels"
#endif
src/targets/gpu/time_op.cpp
View file @
dcb98a60
...
...
@@ -34,7 +34,7 @@ namespace gpu {
std
::
vector
<
argument
>
generate_arguments
(
const
std
::
vector
<
shape
>&
shapes
,
unsigned
long
seed
=
0
)
{
std
::
vector
<
argument
>
args
;
std
::
transform
(
shapes
.
begin
(),
shapes
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
auto
&
s
)
{
std
::
transform
(
shapes
.
begin
(),
shapes
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
const
auto
&
s
)
{
return
to_gpu
(
generate_argument
(
s
,
seed
++
));
});
return
args
;
...
...
src/tf/tf_parser.cpp
View file @
dcb98a60
...
...
@@ -338,7 +338,7 @@ void tf_parser::parse_node(const std::string& name)
std
::
string
input_name
=
input
;
// if input has trailing `:0` index then remove it
auto
multi_out_idx
=
input
.
find
(
':'
);
if
(
multi_out_idx
!=
std
::
string
::
npos
&&
input
.
substr
(
multi_out_idx
+
1
)
==
"0"
)
if
(
multi_out_idx
!=
std
::
string
::
npos
and
input
.
substr
(
multi_out_idx
+
1
)
==
"0"
)
{
input_name
=
input
.
substr
(
0
,
multi_out_idx
);
}
...
...
src/value.cpp
View file @
dcb98a60
...
...
@@ -285,7 +285,7 @@ bool value::contains(const std::string& pkey) const
}
std
::
size_t
value
::
size
()
const
{
auto
*
a
=
if_array_impl
(
x
);
const
auto
*
a
=
if_array_impl
(
x
);
if
(
a
==
nullptr
)
return
0
;
return
a
->
size
();
...
...
test/CMakeLists.txt
View file @
dcb98a60
...
...
@@ -98,17 +98,11 @@ endfunction()
function
(
add_test_executable TEST_NAME
)
add_executable
(
${
TEST_NAME
}
EXCLUDE_FROM_ALL
${
ARGN
}
)
target_link_libraries
(
${
TEST_NAME
}
${
CMAKE_THREAD_LIBS_INIT
}
)
# Cmake does not add flags correctly for gcc
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"GNU"
)
set_target_properties
(
${
TEST_NAME
}
PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread
)
endif
()
set
(
TEST_COMMAND
${
TEST_NAME
}
)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_ref
)
target_link_libraries
(
${
TEST_NAME
}
Threads::Threads
migraphx migraphx_onnx migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
endfunction
(
add_test_executable
)
...
...
@@ -208,11 +202,16 @@ endif()
function
(
test_header NAME HEADER
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
"#include <
${
HEADER
}
>
\n
int main() {}
\n
"
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
"
#include <
${
HEADER
}
>
int main() {}
\n
"
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
"#include <
${
HEADER
}
>
\n
"
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
"
#include <
${
HEADER
}
>
#if defined(min) || defined(max) || defined(near) || defined(far)
#error
\"
Do not include windows.h in header files
\"
#endif
\n
"
)
add_test_executable
(
${
NAME
}
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
...
...
test/api/test_cpu.cpp
View file @
dcb98a60
...
...
@@ -145,15 +145,15 @@ TEST_CASE(zero_parameter)
TEST_CASE
(
set_scalar_parameter
)
{
auto
p1
=
migraphx
::
parse_onnx
(
"add_bcast_test.onnx"
);
migraphx
::
shape
s1
(
migraphx_shape_float_type
,
{
3
,
4
});
auto
p1
=
migraphx
::
parse_onnx
(
"
implicit_
add_bcast_test.onnx"
);
migraphx
::
shape
s1
(
migraphx_shape_float_type
,
{
3
,
4
,
1
});
auto
param_shapes
=
p1
.
get_parameter_shapes
();
auto
s1_orig
=
param_shapes
[
"1"
];
CHECK
(
bool
{
s1
==
s1_orig
});
migraphx
::
onnx_options
option
;
option
.
set_input_parameter_shape
(
"1"
,
{});
auto
p2
=
migraphx
::
parse_onnx
(
"add_bcast_test.onnx"
,
option
);
auto
p2
=
migraphx
::
parse_onnx
(
"
implicit_
add_bcast_test.onnx"
,
option
);
migraphx
::
shape
s_scalar
(
migraphx_shape_float_type
);
auto
param_shapes_1
=
p2
.
get_parameter_shapes
();
auto
s_scalar_after
=
param_shapes_1
[
"1"
];
...
...
test/eliminate_contiguous_test.cpp
View file @
dcb98a60
...
...
@@ -196,15 +196,47 @@ TEST_CASE(contiguous_pointwise)
migraphx
::
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
{
2
,
3
,
8
,
8
}}}),
y
);
auto
yc
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
yb
);
auto
add
=
add_pointwise
(
p
,
"main:pointwise0"
,
{
x
,
yc
},
single_pointwise
(
"add"
));
mm
->
add_instruction
(
pass_op
{},
add
);
auto
cadd
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
add
);
mm
->
add_instruction
(
pass_op
{},
cadd
);
}
auto
count
=
std
::
distance
(
mm
->
begin
(),
mm
->
end
());
run_pass
(
*
mm
);
EXPECT
(
std
::
distance
(
mm
->
begin
(),
mm
->
end
())
==
(
count
-
1
));
EXPECT
(
std
::
distance
(
mm
->
begin
(),
mm
->
end
())
==
(
count
-
2
));
EXPECT
(
std
::
none_of
(
mm
->
begin
(),
mm
->
end
(),
[](
auto
&&
ins
)
{
return
ins
.
name
()
==
"contiguous"
;
}));
}
TEST_CASE
(
contiguous_nhwc_pointwise
)
{
auto
s
=
migraphx
::
shape
::
from_permutation
(
migraphx
::
shape
::
float_type
,
{
2
,
3
,
8
,
8
},
{
0
,
2
,
3
,
1
});
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
3
}});
auto
yb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
{
2
,
3
,
8
,
8
}}}),
y
);
auto
yc
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
yb
);
auto
add
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
x
,
yc
},
single_pointwise
(
"add"
));
auto
cadd
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
add
);
mm
->
add_instruction
(
pass_op
{},
cadd
);
}
run_pass
(
*
p1
.
get_main_module
());
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
3
}});
auto
yb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
{
2
,
3
,
8
,
8
}}}),
y
);
auto
add
=
add_pointwise
(
p2
,
"main:pointwise0"
,
{
x
,
yb
},
single_pointwise
(
"add"
));
auto
cadd
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
add
);
mm
->
add_instruction
(
pass_op
{},
cadd
);
}
EXPECT
(
p1
==
p2
);
}
TEST_CASE
(
slice_contiguous
)
{
migraphx
::
module
m
;
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment