Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
63952fb9
Commit
63952fb9
authored
Jul 06, 2023
by
Brian Pickrell
Browse files
Merge branch 'develop' into multinomial_parse
parents
61f3895c
e7471141
Changes
50
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
458 additions
and
87 deletions
+458
-87
src/targets/cpu/CMakeLists.txt
src/targets/cpu/CMakeLists.txt
+0
-2
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+42
-19
src/targets/gpu/compiler.cpp
src/targets/gpu/compiler.cpp
+3
-2
src/targets/gpu/fuse_ck.cpp
src/targets/gpu/fuse_ck.cpp
+14
-1
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+68
-24
src/targets/gpu/include/migraphx/gpu/compiler.hpp
src/targets/gpu/include/migraphx/gpu/compiler.hpp
+5
-3
src/targets/gpu/include/migraphx/gpu/lowering.hpp
src/targets/gpu/include/migraphx/gpu/lowering.hpp
+2
-2
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+6
-5
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+17
-12
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+32
-9
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+1
-2
test/CMakeLists.txt
test/CMakeLists.txt
+0
-2
test/dead_code_elimination_test.cpp
test/dead_code_elimination_test.cpp
+13
-1
test/gpu/fuse_mlir.cpp
test/gpu/fuse_mlir.cpp
+157
-0
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+81
-1
test/include/basic_ops.hpp
test/include/basic_ops.hpp
+15
-0
test/onnx/.onnxrt-commit
test/onnx/.onnxrt-commit
+1
-1
test/onnx/clip_dyn_min_max_test.onnx
test/onnx/clip_dyn_min_max_test.onnx
+0
-0
test/onnx/clip_dyn_min_only_test.onnx
test/onnx/clip_dyn_min_only_test.onnx
+0
-0
No files found.
src/targets/cpu/CMakeLists.txt
View file @
63952fb9
...
...
@@ -88,8 +88,6 @@ foreach(LIBRARY ${OpenMP_CXX_LIBRARIES})
endif
()
endforeach
()
target_link_libraries
(
migraphx_all_targets INTERFACE migraphx_cpu
)
rocm_install_targets
(
TARGETS migraphx_cpu
INCLUDE
...
...
src/targets/gpu/compile_ops.cpp
View file @
63952fb9
...
...
@@ -111,9 +111,27 @@ struct compile_plan
context
*
ctx
;
operation
preop
;
instruction_ref
ins
;
optional
<
tuning_config
>
config
=
nullopt
;
std
::
vector
<
compiled_result
>
results
=
{};
void
update_config
()
{
config
=
get_tuning_config
(
*
ctx
,
ins
,
preop
);
}
optional
<
tuning_config
>
config
=
nullopt
;
std
::
vector
<
optional
<
compiled_result
>>
results
=
{};
void
update_config
(
bool
exhaustive
)
{
config
=
get_tuning_config
(
*
ctx
,
ins
,
preop
,
exhaustive
);
}
template
<
class
Vector
>
void
insert_compiles
(
Vector
&
compiles
,
const
value
&
solution
,
std
::
size_t
i
)
{
compiles
.
emplace_back
([
=
]
{
try
{
results
[
i
]
=
compiled_result
{
compile
(
*
ctx
,
ins
,
preop
,
solution
),
ins
};
}
catch
(...)
{
results
[
i
]
=
nullopt
;
}
});
}
template
<
class
Vector
>
void
add_compiles
(
Vector
&
compiles
,
problem_cache
&
pc
)
{
...
...
@@ -127,9 +145,7 @@ struct compile_plan
if
(
solution
.
is_null
())
return
;
results
.
resize
(
1
);
compiles
.
emplace_back
([
=
]
{
results
[
0
]
=
compiled_result
{
compile
(
*
ctx
,
ins
,
preop
,
solution
),
ins
};
});
insert_compiles
(
compiles
,
solution
,
0
);
}
else
{
...
...
@@ -139,18 +155,14 @@ struct compile_plan
for
(
auto
i
:
range
(
solutions
.
size
()))
{
auto
solution
=
solutions
[
i
];
compiles
.
emplace_back
([
=
]
{
results
[
i
]
=
compiled_result
{
compile
(
*
ctx
,
ins
,
preop
,
solution
),
ins
};
});
insert_compiles
(
compiles
,
solution
,
i
);
}
}
}
else
{
results
.
resize
(
1
);
compiles
.
emplace_back
([
=
]
{
results
[
0
]
=
compiled_result
{
compile
(
*
ctx
,
ins
,
preop
,
value
{}),
ins
};
});
insert_compiles
(
compiles
,
value
{},
0
);
}
}
const
compiled_result
&
benchmark
(
problem_cache
&
pc
)
const
...
...
@@ -158,7 +170,11 @@ struct compile_plan
if
(
results
.
empty
())
MIGRAPHX_THROW
(
"No configs to tune"
);
if
(
results
.
size
()
==
1
)
return
results
.
front
();
{
if
(
not
results
.
front
().
has_value
())
MIGRAPHX_THROW
(
"No configs to tune"
);
return
*
results
.
front
();
}
if
(
not
config
)
MIGRAPHX_THROW
(
"Multiple kernels without config"
);
std
::
cout
<<
"Benchmarking "
<<
preop
.
name
()
<<
": "
<<
results
.
size
()
<<
" configs"
...
...
@@ -167,11 +183,17 @@ struct compile_plan
times
.
reserve
(
results
.
size
());
std
::
transform
(
results
.
begin
(),
results
.
end
(),
std
::
back_inserter
(
times
),
[
&
](
const
auto
&
cr
)
{
return
time_op
(
*
ctx
,
cr
.
replace
.
code_object
,
to_shapes
(
cr
.
ins
->
inputs
()),
20
).
first
;
if
(
not
cr
.
has_value
())
return
std
::
numeric_limits
<
double
>::
max
();
return
time_op
(
*
ctx
,
cr
->
replace
.
code_object
,
to_shapes
(
cr
->
ins
->
inputs
()),
20
)
.
first
;
});
auto
i
=
std
::
distance
(
times
.
begin
(),
std
::
min_element
(
times
.
begin
(),
times
.
end
()));
std
::
cout
<<
"Fastest solution: "
<<
config
->
solutions
.
at
(
i
)
<<
std
::
endl
;
pc
.
insert
(
preop
.
name
(),
config
->
problem
,
config
->
solutions
.
at
(
i
));
return
results
[
i
];
if
(
not
results
[
i
].
has_value
())
MIGRAPHX_THROW
(
"No valid tuned compilation."
);
return
*
results
[
i
];
}
void
replace
(
module
&
m
,
problem_cache
&
pc
)
const
{
...
...
@@ -185,7 +207,10 @@ void par_compile(std::size_t n, F f)
{
if
(
n
==
0
)
return
;
par_for
(
n
,
n
/
value_of
(
MIGRAPHX_GPU_COMPILE_PARALLEL
{},
n
),
f
);
auto
d
=
value_of
(
MIGRAPHX_GPU_COMPILE_PARALLEL
{});
if
(
d
==
0
)
d
=
n
;
par_for
(
n
,
n
/
d
,
f
);
}
struct
compile_manager
...
...
@@ -202,9 +227,7 @@ struct compile_manager
void
update_configs
()
{
if
(
not
exhaustive
)
return
;
par_compile
(
cps
.
size
(),
[
&
](
auto
i
)
{
cps
[
i
].
update_config
();
});
par_compile
(
cps
.
size
(),
[
&
](
auto
i
)
{
cps
[
i
].
update_config
(
exhaustive
);
});
}
void
compile
(
module
&
m
)
...
...
src/targets/gpu/compiler.cpp
View file @
63952fb9
...
...
@@ -63,9 +63,10 @@ compile_op(const std::string& name, context& ctx, const std::vector<shape>& inpu
return
compiler_map
().
at
(
name
).
compile_op
(
ctx
,
inputs
,
v
);
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
)
{
return
compiler_map
().
at
(
op
.
name
()).
get_tuning_config
(
ctx
,
ins
,
op
);
return
compiler_map
().
at
(
op
.
name
()).
get_tuning_config
(
ctx
,
ins
,
op
,
exhaustive
);
}
}
// namespace gpu
...
...
src/targets/gpu/fuse_ck.cpp
View file @
63952fb9
...
...
@@ -83,10 +83,23 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
return
false
;
auto
a
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
b
=
ins
->
inputs
().
back
()
->
get_shape
();
auto
m
=
a
.
lens
()[
a
.
lens
().
size
()
-
2
];
auto
n
=
b
.
lens
().
back
();
auto
k
=
a
.
lens
().
back
();
// Integer gemms must be divisible by 4 in ck
if
(
contains
({
shape
::
int8_type
,
shape
::
int32_type
},
ins
->
get_shape
().
type
()))
{
if
(
m
%
4
!=
0
)
return
false
;
if
(
n
%
4
!=
0
)
return
false
;
if
(
k
%
4
!=
0
)
return
false
;
}
// Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
// to avoid poor-performing GEMM kernels from CK
// To-do: Investigate a more precise strategy
return
a
.
lens
().
back
()
<=
2048
;
return
k
<=
2048
;
}
struct
find_ck_gemm_pointwise
...
...
src/targets/gpu/fuse_mlir.cpp
View file @
63952fb9
...
...
@@ -139,7 +139,8 @@ struct find_mlir_op
auto
matcher
()
const
{
auto
dot_or_conv
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
any_of
(
match
::
name
(
"dot"
),
is_mlir_conv
()).
bind
(
"gemm_based_op"
));
match
::
any_of
(
match
::
name
(
"dot"
),
match
::
name
(
"quant_dot"
),
is_mlir_conv
())
.
bind
(
"gemm_based_op"
));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
dot_or_conv
.
bind
(
"x"
)));
}
...
...
@@ -190,6 +191,68 @@ struct find_mlir_op
return
{
new_gemm_based_op
,
top_inputs
};
}
// Whitelist supported fusion options, including imposing type constraints
// for cases where MLIR only supports an operation (usually a pointwise function)
// on particular types.
bool
is_pointwise_op_supported_by_mlir
(
const
instruction
&
i
)
const
{
using
type_t
=
shape
::
type_t
;
const
auto
&
name
=
i
.
name
();
const
auto
result_type
=
i
.
get_shape
().
type
();
const
std
::
initializer_list
<
type_t
>
allowed_types
=
{
type_t
::
float_type
,
type_t
::
half_type
,
type_t
::
int8_type
,
type_t
::
int32_type
,
type_t
::
bool_type
};
// Preliminary type check.
if
(
not
contains
(
allowed_types
,
result_type
))
{
return
false
;
}
const
std
::
initializer_list
<
std
::
string
>
any_type_ops
=
{
"@literal"
,
"@param"
,
"@return"
};
const
std
::
initializer_list
<
std
::
string
>
no_bool_ops
=
{
"convolution"
,
"quant_convolution"
,
"dot"
,
"quant_dot"
,
"add"
,
"clip"
,
"sub"
,
"mul"
,
"div"
,
"pow"
,
"where"
,
"quantizelinear"
,
"dequantizelinear"
,
"abs"
,
"neg"
};
const
std
::
initializer_list
<
std
::
string
>
fp_only_ops
=
{
"ceil"
,
"erf"
,
"exp"
,
"floor"
,
"log"
,
"recip"
,
"rsqrt"
,
"sigmoid"
"softmax"
,
"tanh"
};
bool
is_float
=
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
result_type
);
if
(
contains
(
any_type_ops
,
name
))
return
true
;
if
(
result_type
!=
type_t
::
bool_type
&&
contains
(
no_bool_ops
,
name
))
return
true
;
if
(
is_float
&&
contains
(
fp_only_ops
,
name
))
return
true
;
// Only conversions between floating types are known to be unambigiously
// supported.
if
(
is_float
&&
name
==
"convert"
)
{
return
std
::
all_of
(
i
.
inputs
().
begin
(),
i
.
inputs
().
end
(),
[](
const
auto
&
arg
)
{
return
contains
({
type_t
::
float_type
,
type_t
::
half_type
},
arg
->
get_shape
().
type
());
});
}
return
false
;
}
void
apply
(
module_pass_manager
&
mpm
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
...
...
@@ -197,31 +260,12 @@ struct find_mlir_op
auto
x_ins
=
r
.
instructions
[
"x"
];
// input after contiguous
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
names
=
pm
->
get_parameter_names
();
// Whitelist pointwise operators
if
(
std
::
any_of
(
pm
->
begin
(),
pm
->
end
(),
[](
const
auto
&
i
)
{
return
not
contains
({
"@literal"
,
"@param"
,
"@return"
,
"convolution"
,
"quant_convolution"
,
"dot"
,
"add"
,
"relu"
,
"dequantizelinear"
,
"quantizelinear"
,
"mul"
},
i
.
name
());
}))
return
;
// Only fuse with fp32/fp16/int8/int32
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
return
not
contains
({
shape
::
type_t
::
float_type
,
shape
::
type_t
::
half_type
,
shape
::
type_t
::
int8_type
,
shape
::
type_t
::
int32_type
},
i
->
get_shape
().
type
());
// Whitelist pointwise operators.
if
(
std
::
any_of
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
const
auto
&
i
)
{
return
not
is_pointwise_op_supported_by_mlir
(
i
);
}))
return
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
module_ref
mm
=
mpm
.
create_module
(
"mlir_"
+
pm
->
name
());
mm
->
set_bypass
();
...
...
src/targets/gpu/include/migraphx/gpu/compiler.hpp
View file @
63952fb9
...
...
@@ -79,7 +79,7 @@ using compiler_compile =
using
compiler_compile_op
=
std
::
function
<
operation
(
context
&
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
)
>
;
using
compiler_tuning_config
=
std
::
function
<
optional
<
tuning_config
>
(
context
&
,
instruction_ref
,
const
operation
&
)
>
;
std
::
function
<
optional
<
tuning_config
>
(
context
&
,
instruction_ref
,
const
operation
&
,
bool
)
>
;
void
register_compiler
(
const
std
::
string
&
name
,
compiler_compile
c
,
...
...
@@ -91,7 +91,8 @@ compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
const
value
&
solution
);
operation
compile_op
(
const
std
::
string
&
name
,
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
);
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
);
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
);
template
<
class
T
>
void
register_compiler
()
...
...
@@ -125,7 +126,8 @@ template <class Derived>
struct
compiler
:
auto_register_compiler
<
Derived
>
{
const
Derived
&
derived
()
const
{
return
static_cast
<
const
Derived
&>
(
*
this
);
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
,
instruction_ref
,
const
operation
&
)
const
optional
<
tuning_config
>
get_tuning_config
(
context
&
,
instruction_ref
,
const
operation
&
,
bool
)
const
{
return
nullopt
;
}
...
...
src/targets/gpu/include/migraphx/gpu/lowering.hpp
View file @
63952fb9
...
...
@@ -30,7 +30,7 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
struct
module
_pass_manager
;
namespace
gpu
{
...
...
@@ -45,7 +45,7 @@ struct lowering
context
*
ctx
;
bool
offload_copy
;
std
::
string
name
()
const
{
return
"gpu::lowering"
;
}
void
apply
(
module
&
m
)
const
;
void
apply
(
module
_pass_manager
&
mp
m
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/jit/ck_gemm.cpp
View file @
63952fb9
...
...
@@ -50,6 +50,7 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_LOG_CK_GEMM);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING_VALUE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TUNE_CK
);
// NOLINTNEXTLINE
static
const
char
*
const
ck_gemm_kernel
=
R"__migraphx__(
...
...
@@ -265,7 +266,7 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
s
=
shape
{
s
.
type
(),
{
m1
,
m2
}};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"gpu::ck_gemm"
};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"ck_gemm"
,
"gpu::ck_gemm"
};
}
static
bool
standard_batch
(
const
shape
&
s
)
{
...
...
@@ -418,9 +419,7 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
{
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
v
=
create_settings
(
ins
,
op
);
if
(
solution
.
is_null
())
v
[
"tuning_value"
]
=
4
;
else
if
(
not
solution
.
is_null
())
v
[
"tuning_value"
]
=
solution
;
return
{
compile_op
(
ctx
,
shapes
,
v
),
[
=
](
module
&
m
,
instruction_ref
ins2
,
const
operation
&
code_object
)
{
...
...
@@ -436,8 +435,10 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
)
const
{
if
(
not
exhaustive
and
not
enabled
(
MIGRAPHX_TUNE_CK
{}))
return
nullopt
;
tuning_config
tc
;
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
problem
=
create_problem
(
shapes
,
create_settings
(
ins
,
op
));
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
View file @
63952fb9
...
...
@@ -52,7 +52,7 @@ __device__ void ck_gemm_matrix(E e, A a, B b, Ds... ds)
ck
::
make_tuple
(
to_ck_tensor
<
Ds
>
()...),
to_ck_tensor
<
E
>
());
static_assert
(
desc
.
is_v
alid
,
"Invalid ck gemm."
);
static_assert
(
desc
.
IsV
alid
()
,
"Invalid ck gemm."
);
G
::
Run
(
desc
,
to_ck_const_pointer
(
a
.
data
()),
...
...
src/targets/gpu/lowering.cpp
View file @
63952fb9
...
...
@@ -22,12 +22,19 @@
* THE SOFTWARE.
*/
#include <iterator>
#include <migraphx/gpu/lowering.hpp>
#include <utility>
#include <functional>
#include <algorithm>
#include <map>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/if_op.hpp>
...
...
@@ -35,17 +42,12 @@
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <utility>
#include <functional>
#include <algorithm>
#include <map>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -53,8 +55,9 @@ namespace gpu {
struct
miopen_apply
{
module
*
mod
=
nullptr
;
const
lowering
*
pass
=
nullptr
;
module
*
mod
=
nullptr
;
module_pass_manager
*
mpm
=
nullptr
;
const
lowering
*
pass
=
nullptr
;
std
::
unordered_map
<
std
::
string
,
std
::
function
<
instruction_ref
(
instruction_ref
)
>>
apply_map
{};
instruction_ref
last
{};
bool
offload_copy
=
false
;
...
...
@@ -83,8 +86,7 @@ struct miopen_apply
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
compute_fp32
=
get_compute_fp32_flag
();
// TODO: Set Offload copy based on root modules' compile options
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
offload_copy
=
(
mod
==
mpm
->
get_root_module
())
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"contiguous"
);
...
...
@@ -376,7 +378,10 @@ struct miopen_apply
}
};
void
lowering
::
apply
(
module
&
m
)
const
{
miopen_apply
{
&
m
,
this
}.
apply
();
}
void
lowering
::
apply
(
module_pass_manager
&
mpm
)
const
{
miopen_apply
{
&
mpm
.
get_module
(),
&
mpm
,
this
}.
apply
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/mlir.cpp
View file @
63952fb9
...
...
@@ -121,7 +121,10 @@ struct mlir_handle
#define MIGRAPHX_MANAGE_MLIR_HANDLE(T, F) migraphx::gpu::mlir_handle<T, decltype(&F), &F> // NOLINT
using
mlir_context
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirContext
,
mlirContextDestroy
);
using
mlir_context
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirContext
,
mlirContextDestroy
);
using
mlir_thread_pool
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirLlvmThreadPool
,
mlirLlvmThreadPoolDestroy
);
using
mlir_dialect_registry
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirDialectRegistry
,
mlirDialectRegistryDestroy
);
using
mlir_module
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirModule
,
mlirModuleDestroy
);
using
mlir_operation
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOperation
,
mlirOperationDestroy
);
using
mlir_op_printing_flags
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirOpPrintingFlags
,
...
...
@@ -173,16 +176,38 @@ bool has_xdlops(const std::string& target_arch)
struct
mlir_program
{
mlir_program
()
:
ctx
(
mlirContextCreate
()),
:
ctx
(
mlirContextCreateWithRegistry
(
get_dialect_registry
().
get
(),
/*threadingEnable=*/
false
)),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
{
MlirDialectRegistry
registry
=
mlirDialectRegistryCreate
();
mlirRegisterRocMLIRDialects
(
registry
);
mlirContextAppendDialectRegistry
(
ctx
.
get
(),
registry
);
mlirContextSetThreadPool
(
ctx
.
get
(),
get_thread_pool
().
get
());
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirDialectRegistryDestroy
(
registry
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
}
static
mlir_dialect_registry
&
get_dialect_registry
()
{
static
std
::
once_flag
init_guard
;
static
mlir_dialect_registry
the_registry
;
// The MLIR registration functions (for dialects and passes) are not
// necessarily thread-safe and need to be executed exactly once
// (especially since they eventually call non-thread-safe LLVM
// initilizations).
std
::
call_once
(
init_guard
,
[
&
]()
{
the_registry
=
mlirDialectRegistryCreate
();
mlirRegisterRocMLIRDialects
(
the_registry
.
get
());
mlirRegisterRocMLIRPasses
();
});
return
the_registry
;
}
static
mlir_thread_pool
&
get_thread_pool
()
{
// To save on overhead, we create one LLVM thread pool and reuse it
// across all MLIR contexts as recommended by MLIR upstream.
// Note that this is thread-safe as of C++11.
static
mlir_thread_pool
the_pool
=
mlirLlvmThreadPoolCreate
();
return
the_pool
;
}
MlirType
make_type
(
shape
::
type_t
t
)
const
...
...
@@ -244,8 +269,6 @@ struct mlir_program
MlirAttribute
attribute
(
std
::
int64_t
i
)
const
{
if
(
i
<
0
)
MIGRAPHX_THROW
(
"MLIR cant handle negative values since they are ambiguous"
);
return
mlirIntegerAttrGet
(
mlirIntegerTypeGet
(
ctx
.
get
(),
64
),
i
);
}
MlirAttribute
attribute
(
std
::
uint64_t
i
)
const
...
...
src/targets/gpu/target.cpp
View file @
63952fb9
...
...
@@ -76,7 +76,6 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_SCHEDULE_PASS)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_REDUCE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_CK
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_FAST_GELU
)
struct
id_pass
{
...
...
@@ -125,7 +124,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
inline_module
{},
rewrite_pooling
{},
dead_code_elimination
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_FAST_GELU
{})
,
rewrite_gelu
{}),
enable_pass
(
options
.
fast_math
,
rewrite_gelu
{}),
optimize_module
{},
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_NHWC
{}),
layout_nhwc
{}),
dead_code_elimination
{},
...
...
test/CMakeLists.txt
View file @
63952fb9
...
...
@@ -24,8 +24,6 @@
cmake_policy
(
SET CMP0057 NEW
)
include
(
CTest
)
find_package
(
Threads REQUIRED
)
include
(
ProcessorCount
)
ProcessorCount
(
N
)
...
...
test/dead_code_elimination_test.cpp
View file @
63952fb9
...
...
@@ -232,7 +232,6 @@ TEST_CASE(reused_twice)
auto
count
=
std
::
distance
(
mm
->
begin
(),
mm
->
end
());
run_pass
(
p
);
p
.
debug_print
();
EXPECT
(
std
::
distance
(
mm
->
begin
(),
mm
->
end
())
!=
count
);
EXPECT
(
std
::
distance
(
mm
->
begin
(),
mm
->
end
())
==
4
);
}
...
...
@@ -274,4 +273,17 @@ TEST_CASE(param_not_eliminated)
EXPECT
(
p
==
create_program
());
}
TEST_CASE
(
tuple_test
)
{
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
auto
one
=
mm
->
add_literal
(
1
);
auto
two
=
mm
->
add_literal
(
2
);
mm
->
add_instruction
(
tuple_op
{},
one
,
two
);
mm
->
add_return
({
one
,
two
});
auto
count
=
std
::
distance
(
mm
->
begin
(),
mm
->
end
());
run_pass
(
p
);
EXPECT
(
std
::
distance
(
mm
->
begin
(),
mm
->
end
())
==
(
count
-
1
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/fuse_mlir.cpp
0 → 100644
View file @
63952fb9
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/program.hpp>
#include <basic_ops.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include <pointwise.hpp>
void
run_pass
(
migraphx
::
program
&
p
)
{
migraphx
::
run_passes
(
p
,
{
migraphx
::
gpu
::
fuse_mlir
{},
migraphx
::
dead_code_elimination
{}});
}
template
<
class
F
>
migraphx
::
instruction_ref
add_mlir
(
migraphx
::
program
&
p
,
const
std
::
string
&
name
,
std
::
vector
<
migraphx
::
instruction_ref
>
inputs
,
std
::
vector
<
std
::
string
>
arg_names
,
F
f
)
{
assert
(
inputs
.
size
()
==
arg_names
.
size
()
&&
"One interior parameter name given per input."
);
auto
*
mm
=
p
.
get_main_module
();
auto
*
pm
=
p
.
create_module
(
name
);
pm
->
set_bypass
();
std
::
vector
<
migraphx
::
instruction_ref
>
params
;
for
(
size_t
i
=
0
,
e
=
inputs
.
size
();
i
<
e
;
++
i
)
{
params
.
push_back
(
pm
->
add_parameter
(
arg_names
[
i
],
inputs
[
i
]
->
get_shape
()));
}
auto
values
=
f
(
pm
,
params
);
auto
root
=
std
::
get
<
0
>
(
values
);
auto
r
=
std
::
get
<
1
>
(
values
);
pm
->
add_return
({
r
});
return
mm
->
add_instruction
(
migraphx
::
make_op
(
"gpu::mlir_op"
,
{{
"op"
,
migraphx
::
to_value
(
root
->
get_operator
())}}),
inputs
,
{
pm
});
}
TEST_CASE
(
dot_add
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
1
,
3
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
a
=
mm
->
add_parameter
(
"a"
,
s
);
auto
b
=
mm
->
add_parameter
(
"b"
,
s
);
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
dot
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"dot"
),
a
,
b
);
auto
add
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
dot
,
x
},
single_pointwise
(
"add"
));
mm
->
add_return
({
add
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
a
=
mm
->
add_parameter
(
"a"
,
s
);
auto
b
=
mm
->
add_parameter
(
"b"
,
s
);
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
fused
=
add_mlir
(
p2
,
"mlir_main:pointwise0"
,
{
x
,
a
,
b
},
{
"x1"
,
"y0"
,
"y1"
},
[
=
](
auto
*
pm
,
const
auto
&
inputs
)
{
auto
dot
=
pm
->
add_instruction
(
migraphx
::
make_op
(
"dot"
),
inputs
[
1
],
inputs
[
2
]);
auto
add
=
pm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
dot
,
inputs
[
0
]);
return
std
::
make_tuple
(
dot
,
add
);
});
mm
->
add_return
({
fused
});
}
EXPECT
(
p1
.
sort
()
==
p2
.
sort
());
}
TEST_CASE
(
int_quant_dot_abs
)
{
migraphx
::
shape
s_a
{
migraphx
::
shape
::
int8_type
,
{
5
,
4
}};
migraphx
::
shape
s_b
{
migraphx
::
shape
::
int8_type
,
{
4
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
a
=
mm
->
add_parameter
(
"a"
,
s_a
);
auto
b
=
mm
->
add_parameter
(
"b"
,
s_b
);
auto
dot
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"quant_dot"
),
a
,
b
);
auto
abs
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
dot
},
single_pointwise
(
"abs"
));
mm
->
add_return
({
abs
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
a
=
mm
->
add_parameter
(
"a"
,
s_a
);
auto
b
=
mm
->
add_parameter
(
"b"
,
s_b
);
auto
fused
=
add_mlir
(
p2
,
"mlir_main:pointwise0"
,
{
a
,
b
},
{
"y0"
,
"y1"
},
[
=
](
auto
*
pm
,
const
auto
&
inputs
)
{
auto
dot
=
pm
->
add_instruction
(
migraphx
::
make_op
(
"quant_dot"
),
inputs
[
0
],
inputs
[
1
]);
auto
abs
=
pm
->
add_instruction
(
migraphx
::
make_op
(
"abs"
),
dot
);
return
std
::
make_tuple
(
dot
,
abs
);
});
mm
->
add_return
({
fused
});
}
EXPECT
(
p1
.
sort
()
==
p2
.
sort
());
}
TEST_CASE
(
int_quant_dot_tanh_fails
)
{
migraphx
::
shape
s_a
{
migraphx
::
shape
::
int8_type
,
{
5
,
4
}};
migraphx
::
shape
s_b
{
migraphx
::
shape
::
int8_type
,
{
4
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
a
=
mm
->
add_parameter
(
"a"
,
s_a
);
auto
b
=
mm
->
add_parameter
(
"b"
,
s_b
);
auto
dot
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"quant_dot"
),
a
,
b
);
auto
tanh
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
dot
},
single_pointwise
(
"tanh"
));
mm
->
add_return
({
tanh
});
}
migraphx
::
program
p2
(
p1
);
// This pass should do nothing as int32_t tanh isn't supported.
run_pass
(
p1
);
EXPECT
(
p1
==
p2
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
if
(
migraphx
::
gpu
::
mlir_enabled
())
test
::
run
(
argc
,
argv
);
return
0
;
}
test/gpu/mlir.cpp
View file @
63952fb9
...
...
@@ -187,12 +187,39 @@ module {
EXPECT
(
verify_mlir
(
m
));
}
TEST_CASE
(
quant_dot_add
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @main(%arg0: tensor<1x5x4xi8>, %arg1: tensor<1x4x3xi8>, %arg2: tensor<1x5x3xi32>) -> tensor<1x5x3xi32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.quant_dot(%arg0, %arg1) : (tensor<1x5x4xi8>, tensor<1x4x3xi8>) -> tensor<1x5x3xi32>
%1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xi32>, tensor<1x5x3xi32>) -> tensor<1x5x3xi32>
return %1 : tensor<1x5x3xi32>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
arg0
=
m
.
add_parameter
(
"arg0"
,
{
migraphx
::
shape
::
int8_type
,
{
1
,
5
,
4
}});
auto
arg1
=
m
.
add_parameter
(
"arg1"
,
{
migraphx
::
shape
::
int8_type
,
{
1
,
4
,
3
}});
auto
arg2
=
m
.
add_parameter
(
"arg2"
,
{
migraphx
::
shape
::
int32_type
,
{
1
,
5
,
3
}});
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"quant_dot"
),
arg0
,
arg1
);
auto
add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
conv
,
arg2
);
m
.
add_return
({
add
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
TEST_CASE
(
dot_add
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.dot(%arg0, %arg1) : tensor<1x5x4xf32>, tensor<1x4x3xf32> -> tensor<1x5x3xf32>
%0 = migraphx.dot(%arg0, %arg1) :
(
tensor<1x5x4xf32>, tensor<1x4x3xf32>
)
-> tensor<1x5x3xf32>
%1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
return %1 : tensor<1x5x3xf32>
}
...
...
@@ -246,4 +273,57 @@ module {
EXPECT
(
verify_mlir
(
m
));
}
TEST_CASE
(
dot_convert
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>) -> tensor<1x5x3xf16> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
%1 = migraphx.convert(%0) {target_type = 1 : i64} : (tensor<1x5x3xf32>) -> tensor<1x5x3xf16>
return %1 : tensor<1x5x3xf16>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
arg0
=
m
.
add_parameter
(
"arg0"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
5
,
4
}});
auto
arg1
=
m
.
add_parameter
(
"arg1"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
4
,
3
}});
auto
dot
=
m
.
add_instruction
(
migraphx
::
make_op
(
"dot"
),
arg0
,
arg1
);
auto
trunc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
migraphx
::
shape
::
half_type
}}),
dot
);
m
.
add_return
({
trunc
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
TEST_CASE
(
dot_where
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xi8>, %arg3: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
%1 = migraphx.where(%arg2, %0, %arg3) : (tensor<1x5x3xi8>, tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
return %1 : tensor<1x5x3xf32>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
arg0
=
m
.
add_parameter
(
"arg0"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
5
,
4
}});
auto
arg1
=
m
.
add_parameter
(
"arg1"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
4
,
3
}});
auto
arg2
=
m
.
add_parameter
(
"arg2"
,
{
migraphx
::
shape
::
bool_type
,
{
1
,
5
,
3
}});
auto
arg3
=
m
.
add_parameter
(
"arg3"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
5
,
3
}});
auto
dot
=
m
.
add_instruction
(
migraphx
::
make_op
(
"dot"
),
arg0
,
arg1
);
auto
where
=
m
.
add_instruction
(
migraphx
::
make_op
(
"where"
),
arg2
,
dot
,
arg3
);
m
.
add_return
({
where
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/include/basic_ops.hpp
View file @
63952fb9
...
...
@@ -186,6 +186,21 @@ struct nop
migraphx
::
shape
compute_shape
(
const
std
::
vector
<
migraphx
::
shape
>&
)
const
{
return
{};
}
};
struct
tuple_op
{
std
::
string
name
()
const
{
return
"tuple_op"
;
}
migraphx
::
shape
compute_shape
(
const
std
::
vector
<
migraphx
::
shape
>&
inputs
)
const
{
return
{
inputs
};
}
migraphx
::
argument
compute
(
migraphx
::
context
&
,
const
migraphx
::
shape
&
,
const
std
::
vector
<
migraphx
::
argument
>&
input_args
)
const
{
return
input_args
;
}
};
inline
migraphx
::
literal
get_2x2
(
int
base
=
0
)
{
return
migraphx
::
literal
{{
migraphx
::
shape
::
float_type
,
{
2
,
2
}},
...
...
test/onnx/.onnxrt-commit
View file @
63952fb9
5a43828b3d73028bfd33b3856f82698d9ab02cb1
fbf08c4b4dce5da245189203d9f6cfc41f6663a2
test/onnx/clip_dyn_min_max_test.onnx
0 → 100644
View file @
63952fb9
File added
test/onnx/clip_dyn_min_only_test.onnx
0 → 100644
View file @
63952fb9
File added
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment