Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
9747cc44
Commit
9747cc44
authored
Jul 06, 2022
by
Paul
Browse files
Merge branch 'bert-opt2' into bert-opt3
parents
48dbbd11
db0301d7
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
351 additions
and
112 deletions
+351
-112
Dockerfile
Dockerfile
+1
-1
src/include/migraphx/matcher.hpp
src/include/migraphx/matcher.hpp
+13
-11
src/include/migraphx/module.hpp
src/include/migraphx/module.hpp
+8
-2
src/include/migraphx/module_ref.hpp
src/include/migraphx/module_ref.hpp
+2
-1
src/include/migraphx/ranges.hpp
src/include/migraphx/ranges.hpp
+6
-0
src/include/migraphx/shape.hpp
src/include/migraphx/shape.hpp
+4
-0
src/module.cpp
src/module.cpp
+22
-12
src/program.cpp
src/program.cpp
+19
-14
src/serialize.cpp
src/serialize.cpp
+2
-2
src/targets/cpu/write_literals.cpp
src/targets/cpu/write_literals.cpp
+2
-0
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+18
-7
src/targets/gpu/code_object_op.cpp
src/targets/gpu/code_object_op.cpp
+3
-2
src/targets/gpu/deconvolution.cpp
src/targets/gpu/deconvolution.cpp
+70
-29
src/targets/gpu/driver/compile_op.cpp
src/targets/gpu/driver/compile_op.cpp
+5
-2
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
+2
-1
src/targets/gpu/driver/perf.cpp
src/targets/gpu/driver/perf.cpp
+19
-10
src/targets/gpu/driver/run_op.cpp
src/targets/gpu/driver/run_op.cpp
+2
-2
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+139
-0
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+2
-9
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
+12
-7
No files found.
Dockerfile
View file @
9747cc44
...
...
@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD
tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN
PATH
=
/opt/cmake/bin:
$PATH
cget
-p
/usr/local
install
ROCmSoftwarePlatform/llvm-project-mlir@
02078ce236ad90e3aec04c0c770ef5bfc99e49c2
RUN
cget
-p
/usr/local
install
ROCmSoftwarePlatform/llvm-project-mlir@
26a4b3cfc0a1a15181490f24ae461608fef1b04e
-DBUILD_MIXR_TARGET
=
On
ENV
MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV
MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
...
...
src/include/migraphx/matcher.hpp
View file @
9747cc44
...
...
@@ -349,25 +349,27 @@ match::matcher_result find_match(module& modl, M&& m)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_MATCHES
)
/// Find matches for an instruction in the module
template
<
class
...
Ms
>
void
find_matches
(
m
od
ule
&
mod
,
instruction_ref
ins
,
Ms
&&
...
ms
)
template
<
class
Mod
,
class
...
Ms
>
void
find_matches
(
M
od
&
mod
,
instruction_ref
ins
,
Ms
&&
...
ms
)
{
#if !defined(__GNUC__) || defined(__clang__) || __GNUC__ > 5
const
#endif
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MATCHES
{});
bool
match
=
false
;
int
trace
=
value_of
(
MIGRAPHX_TRACE_MATCHES
{});
bool
match
=
false
;
each_args
(
[
&
](
auto
&&
m
)
{
if
(
match
)
return
;
auto
r
=
match_instruction
(
mod
,
ins
,
m
.
matcher
());
if
(
r
.
result
==
mod
.
end
())
if
(
trace
>
1
)
std
::
cout
<<
"Match: "
<<
get_type_name
(
m
)
<<
std
::
endl
;
auto
r
=
match_instruction
(
get_module
(
mod
),
ins
,
m
.
matcher
());
if
(
r
.
result
==
get_module
(
mod
).
end
())
return
;
if
(
trace
)
if
(
trace
>
0
)
{
std
::
cout
<<
"Matched by "
<<
get_type_name
(
m
)
<<
std
::
endl
;
mod
.
debug_print
(
ins
);
get_module
(
mod
)
.
debug_print
(
ins
);
}
m
.
apply
(
mod
,
r
);
match
=
true
;
...
...
@@ -376,10 +378,10 @@ void find_matches(module& mod, instruction_ref ins, Ms&&... ms)
}
/// Find matches in a module
template
<
class
...
Ms
>
void
find_matches
(
m
od
ule
&
mod
,
Ms
&&
...
ms
)
template
<
class
Mod
,
class
...
Ms
>
void
find_matches
(
M
od
&
mod
,
Ms
&&
...
ms
)
{
for
(
auto
ins
:
iterator_for
(
mod
))
for
(
auto
ins
:
iterator_for
(
get_module
(
mod
)
))
{
find_matches
(
mod
,
ins
,
ms
...);
}
...
...
src/include/migraphx/module.hpp
View file @
9747cc44
...
...
@@ -124,7 +124,7 @@ struct module
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_ins
=
{});
std
::
vector
<
instruction_ref
>
add_instructions
(
module_ref
m
,
add_instructions
(
const_
module_ref
m
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_ins
=
{});
std
::
vector
<
instruction_ref
>
...
...
@@ -139,7 +139,7 @@ struct module
std
::
vector
<
instruction_ref
>
insert_instructions
(
instruction_ref
ins
,
module_ref
m
,
const_
module_ref
m
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_ins
=
{});
std
::
vector
<
instruction_ref
>
...
...
@@ -164,6 +164,10 @@ struct module
instruction_ref
replace_return
(
std
::
vector
<
instruction_ref
>
args
);
instruction_ref
insert_literal
(
instruction_ref
ins
,
literal
l
);
instruction_ref
insert_parameter
(
instruction_ref
ins
,
std
::
string
name
,
shape
s
);
std
::
vector
<
std
::
string
>
get_parameter_names
()
const
;
shape
get_parameter_shape
(
std
::
string
name
)
const
;
...
...
@@ -227,6 +231,8 @@ struct module
std
::
unique_ptr
<
module_impl
>
impl
;
};
inline
module
&
get_module
(
module
&
m
)
{
return
m
;
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/include/migraphx/module_ref.hpp
View file @
9747cc44
...
...
@@ -32,7 +32,8 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
using
module_ref
=
module
*
;
using
module_ref
=
module
*
;
using
const_module_ref
=
const
module
*
;
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/include/migraphx/ranges.hpp
View file @
9747cc44
...
...
@@ -198,6 +198,12 @@ void transform(Range&& r, Iterator it, F f)
std
::
transform
(
r
.
begin
(),
r
.
end
(),
it
,
f
);
}
template
<
class
Range1
,
class
Range2
,
class
Iterator
,
class
F
>
void
transform
(
Range1
&&
r1
,
Range2
&&
r2
,
Iterator
it
,
F
f
)
{
std
::
transform
(
r1
.
begin
(),
r1
.
end
(),
r2
.
begin
(),
it
,
f
);
}
template
<
class
Range
>
auto
reverse
(
Range
&
r
)
{
...
...
src/include/migraphx/shape.hpp
View file @
9747cc44
...
...
@@ -191,6 +191,10 @@ struct shape
std
::
size_t
size
(
std
::
size_t
n
=
1
)
const
{
return
sizeof
(
type
)
*
n
;
}
auto
is_integral
()
const
{
return
std
::
is_integral
<
type
>
{};
}
auto
is_signed
()
const
{
return
std
::
is_signed
<
type
>
{};
}
auto
is_unsigned
()
const
{
return
std
::
is_unsigned
<
type
>
{};
}
template
<
class
U
>
type
*
from
(
U
*
buffer
,
std
::
size_t
n
=
0
)
const
{
...
...
src/module.cpp
View file @
9747cc44
...
...
@@ -399,7 +399,8 @@ module::add_instructions(const std::vector<instruction_ref>& instructions,
}
std
::
vector
<
instruction_ref
>
module
::
add_instructions
(
module_ref
m
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_ins
)
module
::
add_instructions
(
const_module_ref
m
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_ins
)
{
return
this
->
insert_instructions
(
this
->
end
(),
m
,
std
::
move
(
map_ins
));
}
...
...
@@ -420,8 +421,10 @@ module::insert_instructions(instruction_ref ins,
return
insert_generic_instructions
(
*
this
,
ins
,
instructions
,
std
::
move
(
map_ins
));
}
std
::
vector
<
instruction_ref
>
module
::
insert_instructions
(
instruction_ref
ins
,
module_ref
m
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_ins
)
std
::
vector
<
instruction_ref
>
module
::
insert_instructions
(
instruction_ref
ins
,
const_module_ref
m
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_ins
)
{
return
insert_generic_instructions
(
*
this
,
ins
,
iterator_for
(
*
m
),
std
::
move
(
map_ins
));
}
...
...
@@ -436,11 +439,7 @@ module::insert_instructions(instruction_ref ins,
return
insert_generic_instructions
(
*
this
,
ins
,
iterator_for
(
r
),
std
::
move
(
map_ins
));
}
instruction_ref
module
::
add_literal
(
literal
l
)
{
impl
->
emplace_front
(
std
::
move
(
l
));
return
impl
->
instructions
.
begin
();
}
instruction_ref
module
::
add_literal
(
literal
l
)
{
return
insert_literal
(
begin
(),
std
::
move
(
l
));
}
instruction_ref
module
::
add_outline
(
const
shape
&
s
)
{
...
...
@@ -450,10 +449,7 @@ instruction_ref module::add_outline(const shape& s)
instruction_ref
module
::
add_parameter
(
std
::
string
name
,
shape
s
)
{
assert
(
get_parameter_shape
(
name
)
==
shape
{});
impl
->
push_front
({
builtin
::
param
{
std
::
move
(
name
),
impl
->
nparams
},
std
::
move
(
s
),
{}});
impl
->
nparams
++
;
return
impl
->
instructions
.
begin
();
return
insert_parameter
(
begin
(),
std
::
move
(
name
),
std
::
move
(
s
));
}
instruction_ref
module
::
add_return
(
std
::
vector
<
instruction_ref
>
args
)
...
...
@@ -466,6 +462,20 @@ instruction_ref module::add_return(std::vector<instruction_ref> args)
return
result
;
}
instruction_ref
module
::
insert_literal
(
instruction_ref
ins
,
literal
l
)
{
impl
->
emplace
(
ins
,
std
::
move
(
l
));
return
std
::
prev
(
ins
);
}
instruction_ref
module
::
insert_parameter
(
instruction_ref
ins
,
std
::
string
name
,
shape
s
)
{
assert
(
get_parameter_shape
(
name
)
==
shape
{});
impl
->
insert
(
ins
,
{
builtin
::
param
{
std
::
move
(
name
),
impl
->
nparams
},
std
::
move
(
s
),
{}});
impl
->
nparams
++
;
return
std
::
prev
(
ins
);
}
instruction_ref
module
::
replace_return
(
std
::
vector
<
instruction_ref
>
args
)
{
auto
last
=
std
::
prev
(
this
->
end
());
...
...
src/program.cpp
View file @
9747cc44
...
...
@@ -504,12 +504,14 @@ static void mod_from_val(module_ref mod,
if
(
name
==
"@param"
)
{
output
=
mod
->
add_parameter
(
fields
[
"parameter"
].
to
<
std
::
string
>
(),
migraphx
::
from_value
<
shape
>
(
node
.
at
(
"shape"
)));
output
=
mod
->
insert_parameter
(
mod
->
end
(),
fields
[
"parameter"
].
to
<
std
::
string
>
(),
migraphx
::
from_value
<
shape
>
(
node
.
at
(
"shape"
)));
}
else
if
(
name
==
"@literal"
)
{
output
=
mod
->
add_literal
(
migraphx
::
from_value
<
literal
>
(
node
.
at
(
"literal"
)));
output
=
mod
->
insert_literal
(
mod
->
end
(),
migraphx
::
from_value
<
literal
>
(
node
.
at
(
"literal"
)));
}
else
{
...
...
@@ -544,11 +546,11 @@ static void mod_from_val(module_ref mod,
}
else
if
(
module_inputs
.
empty
())
{
output
=
mod
->
add
_instruction
(
op
,
inputs
);
output
=
mod
->
insert
_instruction
(
mod
->
end
(),
op
,
inputs
);
}
else
{
output
=
mod
->
add
_instruction
(
op
,
inputs
,
module_inputs
);
output
=
mod
->
insert
_instruction
(
mod
->
end
(),
op
,
inputs
,
module_inputs
);
}
}
output
->
set_normalized
(
normalized
);
...
...
@@ -681,11 +683,13 @@ void program::perf_report(std::ostream& os,
double
overhead_percent
=
overhead_time
*
100.0
/
total_time
;
double
total_instruction_time
=
0.0
;
std
::
unordered_map
<
std
::
string
,
double
>
op_times
;
std
::
unordered_map
<
std
::
string
,
std
::
size_t
>
op_n
;
for
(
auto
&&
p
:
ins_vec
)
{
double
avg
=
common_average
(
p
.
second
);
op_times
[
perf_group
(
p
.
first
->
get_operator
())]
+=
avg
;
total_instruction_time
+=
avg
;
op_n
[
perf_group
(
p
.
first
->
get_operator
())]
++
;
}
double
calculate_overhead_time
=
total_time
-
total_instruction_time
;
double
calculate_overhead_percent
=
calculate_overhead_time
*
100.0
/
total_time
;
...
...
@@ -706,18 +710,19 @@ void program::perf_report(std::ostream& os,
os
<<
std
::
endl
;
os
<<
"Summary:"
<<
std
::
endl
;
std
::
vector
<
std
::
pair
<
double
,
std
::
string
>>
op_times_sorted
;
std
::
transform
(
op_times
.
begin
(),
op_times
.
end
(),
std
::
back_inserter
(
op_times_sorted
),
[](
auto
p
)
{
return
std
::
make_pair
(
p
.
second
,
p
.
first
);
});
std
::
vector
<
std
::
tuple
<
double
,
std
::
size_t
,
std
::
string
>>
op_times_sorted
;
std
::
transform
(
op_times
.
begin
(),
op_times
.
end
(),
std
::
back_inserter
(
op_times_sorted
),
[
&
](
auto
p
)
{
auto
&&
name
=
p
.
first
;
return
std
::
make_tuple
(
p
.
second
,
op_n
.
at
(
name
),
name
);
});
std
::
sort
(
op_times_sorted
.
begin
(),
op_times_sorted
.
end
(),
std
::
greater
<>
{});
for
(
auto
&&
p
:
op_times_sorted
)
for
(
auto
&&
[
avg
,
nn
,
name
]
:
op_times_sorted
)
{
auto
&&
name
=
p
.
second
;
double
avg
=
p
.
first
;
double
percent
=
std
::
ceil
(
100.0
*
avg
/
total_instruction_time
);
os
<<
name
<<
": "
<<
avg
<<
"ms, "
<<
percent
<<
"%"
<<
std
::
endl
;
double
per_ins
=
avg
/
nn
;
os
<<
name
<<
": "
<<
avg
<<
"ms / "
<<
nn
<<
" = "
<<
per_ins
<<
"ms, "
<<
percent
<<
"%"
<<
std
::
endl
;
}
os
<<
std
::
endl
;
...
...
src/serialize.cpp
View file @
9747cc44
...
...
@@ -36,7 +36,7 @@ void raw_data_to_value(value& v, const RawData& rd)
result
[
"shape"
]
=
migraphx
::
to_value
(
rd
.
get_shape
());
if
(
rd
.
get_shape
().
type
()
==
shape
::
tuple_type
)
result
[
"sub"
]
=
migraphx
::
to_value
(
rd
.
get_sub_objects
());
else
else
if
(
not
rd
.
empty
())
result
[
"data"
]
=
migraphx
::
value
::
binary
(
rd
.
data
(),
rd
.
get_shape
().
bytes
());
v
=
result
;
}
...
...
@@ -56,7 +56,7 @@ void migraphx_from_value(const value& v, argument& a)
literal
l
=
migraphx
::
from_value
<
literal
>
(
v
);
a
=
l
.
get_argument
();
}
else
else
if
(
v
.
contains
(
"sub"
))
{
a
=
migraphx
::
from_value
<
std
::
vector
<
argument
>>
(
v
.
at
(
"sub"
));
}
...
...
src/targets/cpu/write_literals.cpp
View file @
9747cc44
...
...
@@ -25,6 +25,7 @@
#include <migraphx/module.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/register_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -52,6 +53,7 @@ struct cpu_literal
return
os
;
}
};
MIGRAPHX_REGISTER_OP
(
cpu_literal
);
void
write_literals
::
apply
(
module
&
m
)
const
{
...
...
src/targets/gpu/CMakeLists.txt
View file @
9747cc44
...
...
@@ -164,6 +164,7 @@ add_library(migraphx_gpu
deconvolution.cpp
device_name.cpp
elu.cpp
fuse_mlir.cpp
fuse_ops.cpp
gather.cpp
gemm_impl.cpp
...
...
@@ -176,7 +177,7 @@ add_library(migraphx_gpu
loop.cpp
lrn.cpp
leaky_relu.cpp
mlir
_conv
.cpp
mlir.cpp
multinomial.cpp
nonzero.cpp
pack_args.cpp
...
...
@@ -320,16 +321,26 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set
(
MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL
""
)
if
(
MIGRAPHX_ENABLE_MLIR
)
find_library
(
LIBMLIRMIOPEN MLIRMIOpenThin REQUIRED
)
find_library
(
MLIRAPI_LIBRARY MLIRMIOpen
PATH_SUFFIXES
# Workaournd broken mlir install
lib/ lib/lib
)
# REQUIRED is not supported before cmake 3.18
if
(
NOT
LIB
MLIR
MIOPEN
)
message
(
FATAL_ERROR
"libMLIRMIOpen
Thin
not found"
)
if
(
NOT MLIR
API_LIBRARY
)
message
(
FATAL_ERROR
"libMLIRMIOpen not found"
)
else
()
message
(
STATUS
"Build with libMLIRMIOpen
Thin
: "
${
LIB
MLIR
MIOPEN
}
)
message
(
STATUS
"Build with libMLIRMIOpen: "
${
MLIR
API_LIBRARY
}
)
endif
()
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_MLIR_MIOPEN_SUPPORT"
)
target_link_libraries
(
migraphx_gpu PUBLIC
${
LIBMLIRMIOPEN
}
)
find_path
(
MLIRAPI_HEADERS NAMES mlir-c/Dialect/MIGraphX.h
)
# Workaround MLIR broken installation
find_path
(
MLIRAPI_HEADERS2 NAMES mlir-c/Registration.h
PATH_SUFFIXES
include/external/include external/include
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_MLIR"
)
target_include_directories
(
migraphx_gpu SYSTEM PRIVATE
${
MLIRAPI_HEADERS
}
${
MLIRAPI_HEADERS2
}
)
target_link_libraries
(
migraphx_gpu PUBLIC
${
MLIRAPI_LIBRARY
}
)
endif
()
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
""
)
...
...
src/targets/gpu/code_object_op.cpp
View file @
9747cc44
...
...
@@ -51,8 +51,9 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
std
::
vector
<
void
*>
kargs
(
args
.
size
());
std
::
transform
(
args
.
begin
(),
args
.
end
(),
kargs
.
begin
(),
[](
const
argument
&
a
)
{
return
a
.
data
();
});
k
.
launch
(
ctx
.
get_stream
().
get
(),
global
,
local
,
std
::
move
(
kargs
));
return
args
.
back
();
auto
[
start
,
stop
]
=
ctx
.
get_perf_events
();
k
.
launch
(
ctx
.
get_stream
().
get
(),
global
,
local
,
std
::
move
(
kargs
),
start
,
stop
);
return
args
[
get_output_arg
(
args
.
size
())];
}
void
code_object_op
::
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
...
...
src/targets/gpu/deconvolution.cpp
View file @
9747cc44
...
...
@@ -59,31 +59,30 @@ argument miopen_deconvolution::compute(context& ctx,
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
float
alpha
=
1
;
float
beta
=
0
;
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x
_desc
.
get
(),
args
[
0
].
implicit
(),
w
_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
ge
t
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: invalid solution ID"
)
;
auto
status
=
miopenConvolutionForwardImmediate
(
ctx
.
get_stream
().
get_miopen
()
,
w
_desc
.
get
(),
args
[
1
].
implicit
(),
x
_desc
.
get
(),
args
[
0
].
implicit
(),
cd
.
get
(),
y_desc
.
get
()
,
args
[
3
].
implicit
()
,
args
[
2
].
implici
t
(),
args
[
2
].
get_shape
().
bytes
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"
R
unning
de
convolution failed"
);
MIGRAPHX_THROW
(
"
MIOpen Deconvolution: r
unning convolution failed"
);
return
args
[
3
];
}
shape
miopen_deconvolution
::
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
shape
miopen_deconvolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
...
...
@@ -119,9 +118,35 @@ shape miopen_deconvolution::compile(context& ctx,
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Find deconvolution failed"
);
handle
=
ctx
.
get_stream
().
get_miopen
();
algo
=
perf
.
fwd_algo
;
MIGRAPHX_THROW
(
"MIOpen Deconvolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
...
...
@@ -129,13 +154,29 @@ void miopen_deconvolution::finalize(context& ctx,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"Workspace has changed during finalization."
);
if
(
cd
==
nullptr
)
cd
=
make_deconv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: compile solution failed"
);
}
}
// namespace gpu
...
...
src/targets/gpu/driver/compile_op.cpp
View file @
9747cc44
...
...
@@ -38,8 +38,11 @@ struct compile_op : action<compile_op>
context
ctx
;
auto
inputs
=
p
.
parse_shapes
(
v
.
at
(
"inputs"
));
auto
op
=
gpu
::
compile_op
(
v
.
at
(
"name"
).
to
<
std
::
string
>
(),
ctx
,
inputs
,
v
);
double
t
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
t
<<
"ms"
<<
std
::
endl
;
auto
[
host_time
,
device_time
]
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
host_time
<<
"ms"
;
if
(
device_time
>
0
)
std
::
cout
<<
", "
<<
device_time
<<
"ms"
;
std
::
cout
<<
std
::
endl
;
}
};
...
...
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
View file @
9747cc44
...
...
@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
driver
{
double
time_op
(
context
&
ctx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
}
// namespace driver
}
// namespace gpu
...
...
src/targets/gpu/driver/perf.cpp
View file @
9747cc44
...
...
@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
}
using
milliseconds
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
;
double
time_op
(
context
&
ctx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
{
// TODO: Use std::ref
migraphx
::
context
gctx
=
ctx
;
auto
output
=
op
.
compute_shape
(
inputs
);
op
.
finalize
(
gctx
,
output
,
inputs
);
migraphx
::
context
ctx
=
ictx
;
auto
&
gctx
=
any_cast
<
migraphx
::
gpu
::
context
>
(
ctx
);
auto
output
=
op
.
compute_shape
(
inputs
);
op
.
finalize
(
ctx
,
output
,
inputs
);
auto
args
=
generate_arguments
(
inputs
);
auto
run
=
[
&
]
{
op
.
compute
(
g
ctx
,
output
,
args
);
g
ctx
.
finish
();
op
.
compute
(
ctx
,
output
,
args
);
ctx
.
finish
();
};
gctx
.
enable_perf_measurement
();
run
();
auto
r
=
range
(
n
);
double
t
=
std
::
accumulate
(
r
.
begin
(),
r
.
end
(),
double
{
0.0
},
[
&
](
auto
x
,
auto
)
{
return
x
+
time
<
milliseconds
>
(
run
);
});
return
t
/
n
;
double
host_time
=
0.0
;
double
device_time
=
0.0
;
for
(
auto
i
:
range
(
n
))
{
(
void
)
i
;
host_time
+=
time
<
milliseconds
>
(
run
);
device_time
+=
gctx
.
get_elapsed_ms
();
}
return
std
::
make_pair
(
host_time
/
n
,
device_time
/
n
);
}
}
// namespace driver
...
...
src/targets/gpu/driver/run_op.cpp
View file @
9747cc44
...
...
@@ -43,8 +43,8 @@ struct run_op : action<run_op>
auto
op
=
make_op
(
name
);
if
(
v
.
contains
(
"fields"
))
op
.
from_value
(
v
.
at
(
"fields"
));
double
t
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
t
<<
"ms"
<<
std
::
endl
;
auto
[
host_time
,
device_time
]
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
host_time
<<
"ms"
<<
std
::
endl
;
}
};
...
...
src/targets/gpu/fuse_mlir.cpp
0 → 100644
View file @
9747cc44
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
#ifdef MIGRAPHX_MLIR
struct
mlir_conv
{
operation
op
=
make_op
(
"convolution"
);
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
op
,
"op"
));
}
std
::
string
name
()
const
{
return
"gpu::mlir_conv"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
check_shapes
{
inputs
,
*
this
}.
standard
();
if
(
mods
.
size
()
!=
1
)
MIGRAPHX_THROW
(
"should have one submodule."
);
if
(
inputs
.
size
()
<
2
)
MIGRAPHX_THROW
(
"should have at least two inputs."
);
auto
n
=
inputs
.
size
();
return
op
.
compute_shape
({
inputs
[
n
-
2
],
inputs
[
n
-
1
]});
}
};
MIGRAPHX_REGISTER_OP
(
mlir_conv
);
namespace
{
struct
find_conv_pointwise
{
// Find a convolution followed by a pointwise operation.
auto
matcher
()
const
{
auto
convolution
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"convolution"
).
bind
(
"convolution"
));
return
match
::
name
(
"pointwise"
)(
match
::
any_of
[
match
::
inputs
()](
convolution
.
bind
(
"x"
)));
}
void
apply
(
module_pass_manager
&
mpm
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
conv_ins
=
r
.
instructions
[
"convolution"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
// input after contiguous
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
names
=
pm
->
get_parameter_names
();
// Whitelist pointwise operators
if
(
std
::
any_of
(
pm
->
begin
(),
pm
->
end
(),
[](
const
auto
&
i
)
{
return
not
contains
({
"@literal"
,
"@param"
,
"@return"
,
"convolution"
,
"add"
,
"relu"
},
i
.
name
());
}))
return
;
// Only fuse with fp32 for now
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
return
i
->
get_shape
().
type
()
!=
shape
::
type_t
::
float_type
;
}))
return
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
module_ref
mm
=
mpm
.
create_module
(
"mlir_"
+
pm
->
name
());
mm
->
set_bypass
();
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
param_map
;
auto
x
=
mm
->
add_parameter
(
"x"
+
std
::
to_string
(
names
.
size
()),
conv_ins
->
inputs
().
at
(
0
)
->
get_shape
());
auto
w
=
mm
->
add_parameter
(
"x"
+
std
::
to_string
(
names
.
size
()
+
1
),
conv_ins
->
inputs
().
at
(
1
)
->
get_shape
());
auto
conv
=
mm
->
add_instruction
(
conv_ins
->
get_operator
(),
{
x
,
w
});
std
::
transform
(
names
.
begin
(),
names
.
end
(),
ins
->
inputs
().
begin
(),
std
::
inserter
(
param_map
,
param_map
.
end
()),
[
&
](
auto
name
,
auto
input
)
{
if
(
input
==
x_ins
)
return
std
::
make_pair
(
pm
->
get_parameter
(
name
),
conv
);
return
std
::
make_pair
(
pm
->
get_parameter
(
name
),
mm
->
add_parameter
(
name
,
input
->
get_shape
()));
});
mm
->
add_return
(
mm
->
insert_instructions
(
mm
->
end
(),
pm
,
param_map
));
std
::
vector
<
instruction_ref
>
inputs
;
std
::
copy_if
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
input
)
{
return
input
!=
conv_ins
;
});
inputs
.
insert
(
inputs
.
end
(),
conv_ins
->
inputs
().
begin
(),
conv_ins
->
inputs
().
end
());
mpm
.
get_module
().
replace_instruction
(
ins
,
mlir_conv
{
conv_ins
->
get_operator
()},
inputs
,
{
mm
});
}
};
}
// namespace
#endif
void
fuse_mlir
::
apply
(
module_pass_manager
&
mpm
)
const
{
#ifdef MIGRAPHX_MLIR
match
::
find_matches
(
mpm
,
find_conv_pointwise
{});
#else
(
void
)
mpm
;
#endif
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/fuse_ops.cpp
View file @
9747cc44
...
...
@@ -342,6 +342,7 @@ void move_standard_front(std::vector<instruction_ref>& args)
auto
gpu_name
(
const
std
::
string
&
s
)
{
return
match
::
name
(
"gpu::"
+
s
);
}
namespace
{
struct
find_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
(
&
gpu_name
);
}
...
...
@@ -843,15 +844,6 @@ inline auto precompile_name(Strings... names) // NOLINT
});
}
template
<
class
...
Ms
>
auto
conv_bias_pointwise
(
Ms
...
ms
)
{
return
precompile_name
(
"pointwise"
)(
match
::
either_arg
(
0
,
1
)(
bias_shape
(
match
::
used_once
()).
bind
(
"bias"
),
fusable_conv
(
match
::
used_once
()).
bind
(
"conv"
)),
ms
...);
}
struct
find_conv_bias
{
context
*
ctx
=
nullptr
;
...
...
@@ -1087,6 +1079,7 @@ struct find_commutative_broadcast
m
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
}
};
}
// namespace
struct
find_contiguous
{
...
...
src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
View file @
9747cc44
...
...
@@ -38,12 +38,13 @@ struct context;
struct
code_object_op
{
value
::
binary
code_object
;
std
::
string
symbol_name
;
std
::
size_t
global
;
std
::
size_t
local
;
std
::
vector
<
shape
>
expected_inputs
;
shape
output
;
value
::
binary
code_object
{};
std
::
string
symbol_name
=
""
;
std
::
size_t
global
=
0
;
std
::
size_t
local
=
0
;
std
::
vector
<
shape
>
expected_inputs
{};
shape
output
{};
std
::
int64_t
output_arg
=
-
1
;
kernel
k
{};
template
<
class
Self
,
class
F
>
...
...
@@ -66,9 +67,13 @@ struct code_object_op
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
);
std
::
int64_t
get_output_arg
(
std
::
size_t
n
)
const
{
return
output_arg
<
0
?
n
+
output_arg
:
output_arg
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
return
get_output_arg
(
shapes
.
size
()
)
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
code_object_op
&
op
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment