Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
faefeef9
Unverified
Commit
faefeef9
authored
May 25, 2022
by
Charlie Lin
Committed by
GitHub
May 25, 2022
Browse files
Merge branch 'develop' into dyn_shape_update
parents
97a40ac3
bf0a4713
Changes
94
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
685 additions
and
583 deletions
+685
-583
src/reduce_dims.cpp
src/reduce_dims.cpp
+18
-4
src/rewrite_batchnorm.cpp
src/rewrite_batchnorm.cpp
+9
-9
src/rewrite_pooling.cpp
src/rewrite_pooling.cpp
+6
-7
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+303
-310
src/schedule.cpp
src/schedule.cpp
+37
-37
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+78
-78
src/simplify_qdq.cpp
src/simplify_qdq.cpp
+1
-1
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+37
-37
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+1
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-0
src/targets/gpu/analyze_streams.cpp
src/targets/gpu/analyze_streams.cpp
+10
-10
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+11
-0
src/targets/gpu/eliminate_workspace.cpp
src/targets/gpu/eliminate_workspace.cpp
+5
-5
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+96
-50
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+38
-6
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+7
-25
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
+24
-0
No files found.
src/reduce_dims.cpp
View file @
faefeef9
...
@@ -16,10 +16,8 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
...
@@ -16,10 +16,8 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
auto
bstride
=
s
.
strides
()[
n
+
1
];
auto
bstride
=
s
.
strides
()[
n
+
1
];
auto
blen
=
s
.
lens
()[
n
+
1
];
auto
blen
=
s
.
lens
()[
n
+
1
];
if
(
astride
==
bstride
*
blen
)
if
(
astride
==
bstride
*
blen
or
alen
==
1
)
{
new_lens
.
push_back
(
alen
*
blen
);
new_lens
.
push_back
(
alen
*
blen
);
}
}
}
if
(
new_lens
.
size
()
!=
shapes
.
size
())
if
(
new_lens
.
size
()
!=
shapes
.
size
())
return
false
;
return
false
;
...
@@ -37,10 +35,25 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
...
@@ -37,10 +35,25 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
return
true
;
return
true
;
}
}
void
reduce_dim1
(
std
::
vector
<
shape
>&
shapes
)
{
if
(
std
::
any_of
(
shapes
.
begin
(),
shapes
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
().
size
()
<
2
or
s
.
lens
().
back
()
!=
1
;
}))
return
;
for
(
auto
&
s
:
shapes
)
{
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
.
pop_back
();
strides
.
pop_back
();
s
=
shape
{
s
.
type
(),
lens
,
strides
};
}
}
std
::
size_t
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
,
std
::
size_t
n
)
std
::
size_t
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
,
std
::
size_t
n
)
{
{
while
(
reduce_dim
(
shapes
,
n
)
and
n
<
shapes
.
size
())
{}
while
(
reduce_dim
(
shapes
,
n
)
and
n
<
shapes
.
size
())
{}
return
n
+
1
;
return
n
+
1
;
}
}
void
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
)
void
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
)
...
@@ -48,6 +61,7 @@ void reduce_dim_all(std::vector<shape>& shapes)
...
@@ -48,6 +61,7 @@ void reduce_dim_all(std::vector<shape>& shapes)
std
::
size_t
n
=
0
;
std
::
size_t
n
=
0
;
while
(
n
<
shapes
.
front
().
lens
().
size
()
-
1
)
while
(
n
<
shapes
.
front
().
lens
().
size
()
-
1
)
n
=
reduce_dim_all
(
shapes
,
n
);
n
=
reduce_dim_all
(
shapes
,
n
);
reduce_dim1
(
shapes
);
}
}
std
::
vector
<
std
::
size_t
>
base_lens
(
const
std
::
vector
<
shape
>&
shapes
)
std
::
vector
<
std
::
size_t
>
base_lens
(
const
std
::
vector
<
shape
>&
shapes
)
...
...
src/rewrite_batchnorm.cpp
View file @
faefeef9
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
rewrite_batchnorm
::
apply
(
module
&
p
)
const
void
rewrite_batchnorm
::
apply
(
module
&
m
)
const
{
{
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
if
(
ins
->
name
()
!=
"batch_norm_inference"
)
if
(
ins
->
name
()
!=
"batch_norm_inference"
)
continue
;
continue
;
...
@@ -46,13 +46,13 @@ void rewrite_batchnorm::apply(module& p) const
...
@@ -46,13 +46,13 @@ void rewrite_batchnorm::apply(module& p) const
});
});
auto
broadcast
=
op
::
broadcast
{
1
,
ins
->
get_shape
().
lens
()};
auto
broadcast
=
op
::
broadcast
{
1
,
ins
->
get_shape
().
lens
()};
auto
a_ins
=
p
.
add_literal
({
a
.
get_shape
(),
a
.
data
()});
auto
a_ins
=
m
.
add_literal
({
a
.
get_shape
(),
a
.
data
()});
auto
a_broadcast
=
p
.
insert_instruction
(
ins
,
broadcast
,
a_ins
);
auto
a_broadcast
=
m
.
insert_instruction
(
ins
,
broadcast
,
a_ins
);
auto
mul
=
p
.
insert_instruction
(
ins
,
make_op
(
"mul"
),
ins
->
inputs
().
front
(),
a_broadcast
);
auto
mul
=
m
.
insert_instruction
(
ins
,
make_op
(
"mul"
),
ins
->
inputs
().
front
(),
a_broadcast
);
auto
b_ins
=
p
.
add_literal
({
b
.
get_shape
(),
b
.
data
()});
auto
b_ins
=
m
.
add_literal
({
b
.
get_shape
(),
b
.
data
()});
auto
b_broadcast
=
p
.
insert_instruction
(
ins
,
broadcast
,
b_ins
);
auto
b_broadcast
=
m
.
insert_instruction
(
ins
,
broadcast
,
b_ins
);
auto
add
=
p
.
insert_instruction
(
ins
,
make_op
(
"add"
),
mul
,
b_broadcast
);
auto
add
=
m
.
insert_instruction
(
ins
,
make_op
(
"add"
),
mul
,
b_broadcast
);
p
.
replace_instruction
(
ins
,
add
);
m
.
replace_instruction
(
ins
,
add
);
}
}
}
}
...
...
src/rewrite_pooling.cpp
View file @
faefeef9
...
@@ -12,9 +12,9 @@
...
@@ -12,9 +12,9 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
rewrite_pooling
::
apply
(
module
&
prog
)
const
void
rewrite_pooling
::
apply
(
module
&
m
)
const
{
{
for
(
auto
ins
:
iterator_for
(
prog
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
if
(
ins
->
name
()
!=
"pooling"
)
if
(
ins
->
name
()
!=
"pooling"
)
continue
;
continue
;
...
@@ -33,26 +33,25 @@ void rewrite_pooling::apply(module& prog) const
...
@@ -33,26 +33,25 @@ void rewrite_pooling::apply(module& prog) const
continue
;
continue
;
std
::
int64_t
n
=
s
.
lens
()[
0
];
std
::
int64_t
n
=
s
.
lens
()[
0
];
std
::
int64_t
c
=
s
.
lens
()[
1
];
std
::
int64_t
c
=
s
.
lens
()[
1
];
auto
reshape
=
prog
.
insert_instruction
(
auto
reshape
=
m
.
insert_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
{
n
*
c
,
-
1
}}}),
ins
->
inputs
().
front
());
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
{
n
*
c
,
-
1
}}}),
ins
->
inputs
().
front
());
instruction_ref
pooling
{};
instruction_ref
pooling
{};
// average pooling
// average pooling
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
{
{
pooling
=
pooling
=
m
.
insert_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
{
1
}}}),
reshape
);
prog
.
insert_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
{
1
}}}),
reshape
);
}
}
// max pooling
// max pooling
else
else
{
{
pooling
=
prog
.
insert_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}}}),
reshape
);
pooling
=
m
.
insert_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}}}),
reshape
);
}
}
std
::
vector
<
int64_t
>
rsp_lens
(
lens
.
size
(),
1
);
std
::
vector
<
int64_t
>
rsp_lens
(
lens
.
size
(),
1
);
rsp_lens
[
0
]
=
n
;
rsp_lens
[
0
]
=
n
;
rsp_lens
[
1
]
=
c
;
rsp_lens
[
1
]
=
c
;
prog
.
replace_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
rsp_lens
}}),
pooling
);
m
.
replace_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
rsp_lens
}}),
pooling
);
}
}
}
}
...
...
src/rewrite_rnn.cpp
View file @
faefeef9
This diff is collapsed.
Click to expand it.
src/schedule.cpp
View file @
faefeef9
...
@@ -42,7 +42,7 @@ struct stream_info
...
@@ -42,7 +42,7 @@ struct stream_info
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
iweights
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
iweights
;
ins_dep_map
mod_implicit_deps
;
ins_dep_map
mod_implicit_deps
;
void
calc_implicit_deps
(
const
module
&
p
)
{
mod_implicit_deps
=
p
.
calc_implicit_deps
();
}
void
calc_implicit_deps
(
const
module
&
m
)
{
mod_implicit_deps
=
m
.
calc_implicit_deps
();
}
void
accumulate_weights
(
instruction_ref
last
,
const
schedule_model
&
model
)
void
accumulate_weights
(
instruction_ref
last
,
const
schedule_model
&
model
)
{
{
...
@@ -116,15 +116,15 @@ struct stream_info
...
@@ -116,15 +116,15 @@ struct stream_info
}
}
};
};
std
::
size_t
assign_streams
(
module
&
p
,
std
::
size_t
n
)
std
::
size_t
assign_streams
(
module
&
m
,
std
::
size_t
n
)
{
{
assert
(
n
>
0
);
assert
(
n
>
0
);
partition
critical
;
partition
critical
;
std
::
unordered_map
<
instruction_ref
,
std
::
deque
<
partition
>>
partitions
;
std
::
unordered_map
<
instruction_ref
,
std
::
deque
<
partition
>>
partitions
;
partitions
.
reserve
(
weights
.
size
());
partitions
.
reserve
(
weights
.
size
());
fix
([
&
](
auto
self
,
auto
ins
,
auto
&
part
)
{
fix
([
&
](
auto
self
,
auto
ins
,
auto
&
part
)
{
assert
(
not
is_end
(
ins
,
p
.
end
()));
assert
(
not
is_end
(
ins
,
m
.
end
()));
if
(
not
p
.
has_instruction
(
ins
))
if
(
not
m
.
has_instruction
(
ins
))
return
;
return
;
if
(
contains
(
partitions
,
ins
))
if
(
contains
(
partitions
,
ins
))
return
;
return
;
...
@@ -151,8 +151,8 @@ struct stream_info
...
@@ -151,8 +151,8 @@ struct stream_info
}
}
}
}
// Sort instructions
// Sort instructions
p
.
move_instruction
(
ins
,
p
.
end
());
m
.
move_instruction
(
ins
,
m
.
end
());
})(
std
::
prev
(
p
.
end
()),
critical
);
})(
std
::
prev
(
m
.
end
()),
critical
);
// Set the critical partition to stream 0
// Set the critical partition to stream 0
set_stream
(
critical
,
0
);
set_stream
(
critical
,
0
);
...
@@ -197,13 +197,13 @@ struct stream_info
...
@@ -197,13 +197,13 @@ struct stream_info
}
}
};
};
void
sort
(
module
&
p
,
std
::
size_t
)
void
sort
(
module
&
m
,
std
::
size_t
)
{
{
std
::
set
<
weight_ins
,
compare_weight_ins
>
children
;
std
::
set
<
weight_ins
,
compare_weight_ins
>
children
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
visited
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
visited
;
auto
last
=
std
::
prev
(
p
.
end
());
auto
last
=
std
::
prev
(
m
.
end
());
auto
mw
=
this
->
weights
.
at
(
last
);
auto
mw
=
this
->
weights
.
at
(
last
);
auto
nw
=
mw
/
(
p
.
size
()
+
1
);
auto
nw
=
mw
/
(
m
.
size
()
+
1
);
auto
add_child
=
[
&
](
auto
ins
)
{
auto
add_child
=
[
&
](
auto
ins
)
{
auto
x
=
1
+
(
mw
-
this
->
weights
.
at
(
ins
))
/
(
nw
+
1
);
auto
x
=
1
+
(
mw
-
this
->
weights
.
at
(
ins
))
/
(
nw
+
1
);
auto
w
=
x
*
this
->
iweights
.
at
(
ins
);
auto
w
=
x
*
this
->
iweights
.
at
(
ins
);
...
@@ -222,10 +222,10 @@ struct stream_info
...
@@ -222,10 +222,10 @@ struct stream_info
// Pop the first element
// Pop the first element
auto
top
=
children
.
begin
()
->
second
;
auto
top
=
children
.
begin
()
->
second
;
children
.
erase
(
children
.
begin
());
children
.
erase
(
children
.
begin
());
p
.
move_instruction
(
top
,
p
.
begin
());
m
.
move_instruction
(
top
,
m
.
begin
());
for
(
auto
ins
:
top
->
inputs
())
for
(
auto
ins
:
top
->
inputs
())
{
{
if
(
not
p
.
has_instruction
(
ins
))
if
(
not
m
.
has_instruction
(
ins
))
continue
;
continue
;
add_child
(
ins
);
add_child
(
ins
);
}
}
...
@@ -234,7 +234,7 @@ struct stream_info
...
@@ -234,7 +234,7 @@ struct stream_info
{
{
for
(
auto
ins
:
mod_implicit_deps
.
at
(
top
))
for
(
auto
ins
:
mod_implicit_deps
.
at
(
top
))
{
{
assert
(
p
.
has_instruction
(
ins
));
assert
(
m
.
has_instruction
(
ins
));
add_child
(
ins
);
add_child
(
ins
);
}
}
}
}
...
@@ -242,12 +242,12 @@ struct stream_info
...
@@ -242,12 +242,12 @@ struct stream_info
// move dangling parameter to the front so as not be removed
// move dangling parameter to the front so as not be removed
auto
ins
=
std
::
next
(
last
);
auto
ins
=
std
::
next
(
last
);
while
(
ins
!=
p
.
end
())
while
(
ins
!=
m
.
end
())
{
{
auto
next
=
std
::
next
(
ins
);
auto
next
=
std
::
next
(
ins
);
if
(
ins
->
name
()
==
"@param"
)
if
(
ins
->
name
()
==
"@param"
)
{
{
p
.
move_instruction
(
ins
,
p
.
begin
());
m
.
move_instruction
(
ins
,
m
.
begin
());
}
}
ins
=
next
;
ins
=
next
;
}
}
...
@@ -364,18 +364,18 @@ struct stream_info
...
@@ -364,18 +364,18 @@ struct stream_info
}
}
std
::
unordered_map
<
instruction_ref
,
std
::
vector
<
std
::
vector
<
instruction_ref
>>>
std
::
unordered_map
<
instruction_ref
,
std
::
vector
<
std
::
vector
<
instruction_ref
>>>
find_concurrent_instructions
(
module
&
p
)
const
find_concurrent_instructions
(
module
&
m
)
const
{
{
std
::
unordered_map
<
instruction_ref
,
std
::
vector
<
std
::
vector
<
instruction_ref
>>>
result
;
std
::
unordered_map
<
instruction_ref
,
std
::
vector
<
std
::
vector
<
instruction_ref
>>>
result
;
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
merge_from
;
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
merge_from
;
dominator_info
di
=
compute_dominator
(
p
);
dominator_info
di
=
compute_dominator
(
m
);
result
.
reserve
(
p
.
size
());
result
.
reserve
(
m
.
size
());
merge_from
.
reserve
(
p
.
size
());
merge_from
.
reserve
(
m
.
size
());
for
(
auto
ins
:
reverse_iterator_for
(
p
))
for
(
auto
ins
:
reverse_iterator_for
(
m
))
{
{
for
(
auto
&&
arg
:
ins
->
outputs
())
for
(
auto
&&
arg
:
ins
->
outputs
())
{
{
if
(
not
p
.
has_instruction
(
arg
))
if
(
not
m
.
has_instruction
(
arg
))
continue
;
continue
;
if
(
is_merge_point
(
arg
))
if
(
is_merge_point
(
arg
))
merge_from
[
ins
].
insert
(
arg
);
merge_from
[
ins
].
insert
(
arg
);
...
@@ -415,18 +415,18 @@ struct stream_info
...
@@ -415,18 +415,18 @@ struct stream_info
}
}
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
get_conflicts
(
module
&
p
)
get_conflicts
(
module
&
m
)
{
{
using
conflict_table_type
=
using
conflict_table_type
=
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
;
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
;
conflict_table_type
conflict_table
;
conflict_table_type
conflict_table
;
auto
concur_ins
=
this
->
find_concurrent_instructions
(
p
);
auto
concur_ins
=
this
->
find_concurrent_instructions
(
m
);
// Compute an index for each instruction
// Compute an index for each instruction
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
ins2index
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
ins2index
;
std
::
size_t
index_total
=
0
;
std
::
size_t
index_total
=
0
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
ins2index
[
ins
]
=
index_total
++
;
ins2index
[
ins
]
=
index_total
++
;
std
::
vector
<
conflict_table_type
>
thread_conflict_tables
(
std
::
vector
<
conflict_table_type
>
thread_conflict_tables
(
...
@@ -507,21 +507,21 @@ struct stream_info
...
@@ -507,21 +507,21 @@ struct stream_info
}
}
};
};
void
schedule
::
apply
(
module
&
p
)
const
void
schedule
::
apply
(
module
&
m
)
const
{
{
if
(
not
enable
)
if
(
not
enable
)
return
;
return
;
stream_info
si
;
stream_info
si
;
si
.
calc_implicit_deps
(
p
);
si
.
calc_implicit_deps
(
m
);
auto
last
=
std
::
prev
(
p
.
end
());
auto
last
=
std
::
prev
(
m
.
end
());
si
.
accumulate_weights
(
last
,
model
);
si
.
accumulate_weights
(
last
,
model
);
auto
nstreams
=
si
.
assign_streams
(
p
,
model
.
concurrency
());
auto
nstreams
=
si
.
assign_streams
(
m
,
model
.
concurrency
());
si
.
sort
(
p
,
model
.
concurrency
());
si
.
sort
(
m
,
model
.
concurrency
());
if
(
enabled
(
MIGRAPHX_TRACE_COMPILE
{})
or
enabled
(
MIGRAPHX_TRACE_SCHEDULE
{}))
if
(
enabled
(
MIGRAPHX_TRACE_COMPILE
{})
or
enabled
(
MIGRAPHX_TRACE_SCHEDULE
{}))
{
{
p
.
annotate
(
std
::
cout
,
[
&
](
auto
ins
)
{
m
.
annotate
(
std
::
cout
,
[
&
](
auto
ins
)
{
if
(
ins
->
name
()
==
"@param"
and
not
contains
(
si
.
weights
,
ins
))
if
(
ins
->
name
()
==
"@param"
and
not
contains
(
si
.
weights
,
ins
))
return
;
return
;
...
@@ -548,9 +548,9 @@ void schedule::apply(module& p) const
...
@@ -548,9 +548,9 @@ void schedule::apply(module& p) const
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
ins2wait
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
ins2wait
;
std
::
unordered_map
<
std
::
size_t
,
std
::
unordered_set
<
std
::
size_t
>>
waited_for
;
std
::
unordered_map
<
std
::
size_t
,
std
::
unordered_set
<
std
::
size_t
>>
waited_for
;
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
std
::
size_t
>>
ins2waited
;
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
std
::
size_t
>>
ins2waited
;
ins2wait
.
reserve
(
p
.
size
());
ins2wait
.
reserve
(
m
.
size
());
ins2waited
.
reserve
(
p
.
size
());
ins2waited
.
reserve
(
m
.
size
());
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
// Only schedule instructions that have a stream
// Only schedule instructions that have a stream
if
(
not
si
.
has_stream
(
ins
))
if
(
not
si
.
has_stream
(
ins
))
...
@@ -559,7 +559,7 @@ void schedule::apply(module& p) const
...
@@ -559,7 +559,7 @@ void schedule::apply(module& p) const
// Schedule instruction on the stream
// Schedule instruction on the stream
auto
stream
=
si
.
get_stream
(
ins
);
auto
stream
=
si
.
get_stream
(
ins
);
assert
(
stream
<
model
.
concurrency
());
assert
(
stream
<
model
.
concurrency
());
model
.
sched
(
p
,
ins
,
stream
);
model
.
sched
(
m
,
ins
,
stream
);
// Insert wait instructions
// Insert wait instructions
if
(
si
.
is_merge_point
(
ins
,
stream
))
if
(
si
.
is_merge_point
(
ins
,
stream
))
{
{
...
@@ -572,14 +572,14 @@ void schedule::apply(module& p) const
...
@@ -572,14 +572,14 @@ void schedule::apply(module& p) const
if
(
not
contains
(
ins2wait
,
i
))
if
(
not
contains
(
ins2wait
,
i
))
{
{
ins2wait
[
i
]
=
wait_id
;
ins2wait
[
i
]
=
wait_id
;
model
.
record
(
p
,
i
,
wait_id
);
model
.
record
(
m
,
i
,
wait_id
);
wait_id
++
;
wait_id
++
;
}
}
auto
w
=
ins2wait
.
at
(
i
);
auto
w
=
ins2wait
.
at
(
i
);
// If we already waited for the event on this stream then dont
// If we already waited for the event on this stream then dont
// insert another wait event
// insert another wait event
if
(
not
contains
(
waited_for
[
stream
],
w
))
if
(
not
contains
(
waited_for
[
stream
],
w
))
model
.
wait
(
p
,
ins
,
w
);
model
.
wait
(
m
,
ins
,
w
);
// Store the event as waited
// Store the event as waited
waited_for
[
stream
].
insert
(
w
);
waited_for
[
stream
].
insert
(
w
);
// Store all wait events that have been waited on prior to the recorded instruction
// Store all wait events that have been waited on prior to the recorded instruction
...
@@ -594,7 +594,7 @@ void schedule::apply(module& p) const
...
@@ -594,7 +594,7 @@ void schedule::apply(module& p) const
}
}
// Add memory conflicts
// Add memory conflicts
auto
conflict_table
=
si
.
get_conflicts
(
p
);
auto
conflict_table
=
si
.
get_conflicts
(
m
);
for
(
auto
&&
ip
:
conflict_table
)
for
(
auto
&&
ip
:
conflict_table
)
{
{
if
(
ip
.
second
.
empty
())
if
(
ip
.
second
.
empty
())
...
@@ -602,7 +602,7 @@ void schedule::apply(module& p) const
...
@@ -602,7 +602,7 @@ void schedule::apply(module& p) const
std
::
vector
<
instruction_ref
>
args
;
std
::
vector
<
instruction_ref
>
args
;
args
.
push_back
(
ip
.
first
);
args
.
push_back
(
ip
.
first
);
args
.
insert
(
args
.
end
(),
ip
.
second
.
begin
(),
ip
.
second
.
end
());
args
.
insert
(
args
.
end
(),
ip
.
second
.
begin
(),
ip
.
second
.
end
());
p
.
insert_instruction
(
std
::
next
(
ip
.
first
),
make_op
(
"identity"
),
args
);
m
.
insert_instruction
(
std
::
next
(
ip
.
first
),
make_op
(
"identity"
),
args
);
}
}
}
}
...
...
src/simplify_algebra.cpp
View file @
faefeef9
This diff is collapsed.
Click to expand it.
src/simplify_qdq.cpp
View file @
faefeef9
...
@@ -53,7 +53,7 @@ struct match_find_quantizable_ops
...
@@ -53,7 +53,7 @@ struct match_find_quantizable_ops
match
::
arg
(
1
)(
dequantizelinear_op
(
"x2"
,
"scale2"
)));
match
::
arg
(
1
)(
dequantizelinear_op
(
"x2"
,
"scale2"
)));
}
}
void
apply
(
module
&
m
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
qop
=
r
.
result
;
auto
qop
=
r
.
result
;
auto
q1
=
r
.
instructions
[
"x1"
];
auto
q1
=
r
.
instructions
[
"x1"
];
...
...
src/simplify_reshapes.cpp
View file @
faefeef9
...
@@ -70,19 +70,19 @@ struct find_reshaper
...
@@ -70,19 +70,19 @@ struct find_reshaper
match
::
any_of
[
match
::
outputs
()](
match
::
name
(
reshaper_names
())));
match
::
any_of
[
match
::
outputs
()](
match
::
name
(
reshaper_names
())));
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
mr
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
{
auto
ins
=
mr
.
result
;
auto
ins
=
mr
.
result
;
std
::
vector
<
instruction_ref
>
reshapes
{
ins
};
std
::
vector
<
instruction_ref
>
reshapes
{
ins
};
while
(
is_reshaper
(
reshapes
.
back
()))
while
(
is_reshaper
(
reshapes
.
back
()))
{
{
assert
(
!
reshapes
.
back
()
->
inputs
().
empty
());
assert
(
!
reshapes
.
back
()
->
inputs
().
empty
());
assert
(
p
.
has_instruction
(
reshapes
.
back
()
->
inputs
().
front
()));
assert
(
m
.
has_instruction
(
reshapes
.
back
()
->
inputs
().
front
()));
auto
input
=
reshapes
.
back
()
->
inputs
().
front
();
auto
input
=
reshapes
.
back
()
->
inputs
().
front
();
reshapes
.
push_back
(
input
);
reshapes
.
push_back
(
input
);
}
}
std
::
pair
<
instruction_ref
,
instruction_ref
>
r
{
p
.
end
(),
p
.
end
()};
std
::
pair
<
instruction_ref
,
instruction_ref
>
r
{
m
.
end
(),
m
.
end
()};
for
(
auto
start
:
iterator_for
(
reshapes
))
for
(
auto
start
:
iterator_for
(
reshapes
))
{
{
auto
last
=
std
::
find_if
(
reshapes
.
rbegin
(),
reshapes
.
rend
(),
[
&
](
auto
&&
i
)
{
auto
last
=
std
::
find_if
(
reshapes
.
rbegin
(),
reshapes
.
rend
(),
[
&
](
auto
&&
i
)
{
...
@@ -96,7 +96,7 @@ struct find_reshaper
...
@@ -96,7 +96,7 @@ struct find_reshaper
}
}
if
(
r
.
first
!=
r
.
second
)
if
(
r
.
first
!=
r
.
second
)
{
{
p
.
replace_instruction
(
r
.
first
,
r
.
second
);
m
.
replace_instruction
(
r
.
first
,
r
.
second
);
}
}
}
}
};
};
...
@@ -117,10 +117,10 @@ struct find_nop_reshapes
...
@@ -117,10 +117,10 @@ struct find_nop_reshapes
return
match
::
name
(
reshapes
)(
match
::
same_shape
(
match
::
arg
(
0
)));
return
match
::
name
(
reshapes
)(
match
::
same_shape
(
match
::
arg
(
0
)));
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
mr
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
{
auto
ins
=
mr
.
result
;
auto
ins
=
mr
.
result
;
p
.
replace_instruction
(
ins
,
ins
->
inputs
().
front
());
m
.
replace_instruction
(
ins
,
ins
->
inputs
().
front
());
}
}
};
};
...
@@ -132,7 +132,7 @@ struct find_transpose
...
@@ -132,7 +132,7 @@ struct find_transpose
match
::
skip_output
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"transpose"
))));
match
::
skip_output
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"transpose"
))));
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
mr
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
{
auto
ins
=
mr
.
result
;
auto
ins
=
mr
.
result
;
auto
x
=
ins
;
auto
x
=
ins
;
...
@@ -149,11 +149,11 @@ struct find_transpose
...
@@ -149,11 +149,11 @@ struct find_transpose
return
;
return
;
if
(
is_no_transpose
(
dims
))
if
(
is_no_transpose
(
dims
))
{
{
p
.
replace_instruction
(
ins
,
t
->
inputs
().
front
());
m
.
replace_instruction
(
ins
,
t
->
inputs
().
front
());
}
}
else
else
{
{
p
.
replace_instruction
(
m
.
replace_instruction
(
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
dims
}}),
t
->
inputs
().
front
());
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
dims
}}),
t
->
inputs
().
front
());
}
}
}
}
...
@@ -223,7 +223,7 @@ struct find_nested_slice
...
@@ -223,7 +223,7 @@ struct find_nested_slice
return
result
;
return
result
;
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
mr
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
{
auto
ins
=
mr
.
result
;
auto
ins
=
mr
.
result
;
auto
slice
=
ins
->
inputs
().
front
();
auto
slice
=
ins
->
inputs
().
front
();
...
@@ -241,7 +241,7 @@ struct find_nested_slice
...
@@ -241,7 +241,7 @@ struct find_nested_slice
op
.
starts
.
push_back
(
pp
.
second
.
first
);
op
.
starts
.
push_back
(
pp
.
second
.
first
);
op
.
ends
.
push_back
(
pp
.
second
.
second
);
op
.
ends
.
push_back
(
pp
.
second
.
second
);
}
}
p
.
replace_instruction
(
ins
,
op
,
input
);
m
.
replace_instruction
(
ins
,
op
,
input
);
}
}
};
};
...
@@ -252,7 +252,7 @@ struct find_concat_transpose
...
@@ -252,7 +252,7 @@ struct find_concat_transpose
return
match
::
name
(
"concat"
)(
match
::
all_of
[
match
::
inputs
()](
match
::
transpose_shape
()));
return
match
::
name
(
"concat"
)(
match
::
all_of
[
match
::
inputs
()](
match
::
transpose_shape
()));
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
mr
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
{
auto
ins
=
mr
.
result
;
auto
ins
=
mr
.
result
;
auto
trans_inputs
=
ins
->
inputs
();
auto
trans_inputs
=
ins
->
inputs
();
...
@@ -279,14 +279,14 @@ struct find_concat_transpose
...
@@ -279,14 +279,14 @@ struct find_concat_transpose
std
::
vector
<
instruction_ref
>
inputs
;
std
::
vector
<
instruction_ref
>
inputs
;
std
::
transform
(
std
::
transform
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
i
)
{
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
i
)
{
return
p
.
insert_instruction
(
return
m
.
insert_instruction
(
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
permutation
}}),
i
);
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
permutation
}}),
i
);
});
});
auto
concat
=
p
.
insert_instruction
(
ins
,
op
,
inputs
);
auto
concat
=
m
.
insert_instruction
(
ins
,
op
,
inputs
);
auto
t
=
p
.
insert_instruction
(
auto
t
=
m
.
insert_instruction
(
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
ipermutation
}}),
concat
);
ins
,
make_op
(
"transpose"
,
{{
"permutation"
,
ipermutation
}}),
concat
);
assert
(
ins
->
get_shape
().
lens
()
==
t
->
get_shape
().
lens
());
assert
(
ins
->
get_shape
().
lens
()
==
t
->
get_shape
().
lens
());
p
.
replace_instruction
(
ins
,
t
);
m
.
replace_instruction
(
ins
,
t
);
}
}
};
};
...
@@ -303,7 +303,7 @@ struct find_nested_concat
...
@@ -303,7 +303,7 @@ struct find_nested_concat
return
op
.
axis
;
return
op
.
axis
;
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
mr
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
{
auto
ins
=
mr
.
result
;
auto
ins
=
mr
.
result
;
auto
axis
=
get_axis
(
ins
);
auto
axis
=
get_axis
(
ins
);
...
@@ -317,7 +317,7 @@ struct find_nested_concat
...
@@ -317,7 +317,7 @@ struct find_nested_concat
args
.
push_back
(
i
);
args
.
push_back
(
i
);
}
}
})(
ins
->
inputs
());
})(
ins
->
inputs
());
p
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
m
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
}
}
};
};
...
@@ -329,7 +329,7 @@ struct find_resize
...
@@ -329,7 +329,7 @@ struct find_resize
match
::
args
(
match
::
name
(
"reshape"
).
bind
(
"data"
),
match
::
is_constant
().
bind
(
"ind"
)));
match
::
args
(
match
::
name
(
"reshape"
).
bind
(
"data"
),
match
::
is_constant
().
bind
(
"ind"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
ins_rsp
=
r
.
instructions
[
"data"
];
auto
ins_rsp
=
r
.
instructions
[
"data"
];
...
@@ -417,13 +417,13 @@ struct find_resize
...
@@ -417,13 +417,13 @@ struct find_resize
}
}
auto
in_rsp
=
ins_rsp
->
inputs
().
front
();
auto
in_rsp
=
ins_rsp
->
inputs
().
front
();
auto
rsp_data
=
p
.
insert_instruction
(
auto
rsp_data
=
m
.
insert_instruction
(
ins_rsp
,
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
in_dims
}}),
in_rsp
);
ins_rsp
,
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
in_dims
}}),
in_rsp
);
auto
mb_rsp
=
p
.
insert_instruction
(
auto
mb_rsp
=
m
.
insert_instruction
(
ins_rsp
,
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
out_dims
}}),
rsp_data
);
ins_rsp
,
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
out_dims
}}),
rsp_data
);
auto
std_mb
=
p
.
insert_instruction
(
ins
,
migraphx
::
make_op
(
"contiguous"
),
mb_rsp
);
auto
std_mb
=
m
.
insert_instruction
(
ins
,
migraphx
::
make_op
(
"contiguous"
),
mb_rsp
);
std
::
vector
<
int64_t
>
rsp_dims
(
out_lens
.
begin
(),
out_lens
.
end
());
std
::
vector
<
int64_t
>
rsp_dims
(
out_lens
.
begin
(),
out_lens
.
end
());
p
.
replace_instruction
(
ins
,
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
rsp_dims
}}),
std_mb
);
m
.
replace_instruction
(
ins
,
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
rsp_dims
}}),
std_mb
);
}
}
};
};
...
@@ -436,7 +436,7 @@ struct find_where_op
...
@@ -436,7 +436,7 @@ struct find_where_op
match
::
is_constant
().
bind
(
"ind"
)));
match
::
is_constant
().
bind
(
"ind"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
concat
=
r
.
instructions
[
"data"
];
auto
concat
=
r
.
instructions
[
"data"
];
...
@@ -475,11 +475,11 @@ struct find_where_op
...
@@ -475,11 +475,11 @@ struct find_where_op
if
(
val
)
if
(
val
)
{
{
p
.
replace_instruction
(
ins
,
inputs
.
at
(
0
));
m
.
replace_instruction
(
ins
,
inputs
.
at
(
0
));
}
}
else
else
{
{
p
.
replace_instruction
(
ins
,
inputs
.
at
(
1
));
m
.
replace_instruction
(
ins
,
inputs
.
at
(
1
));
}
}
}
}
};
};
...
@@ -496,7 +496,7 @@ struct find_reshape_cont
...
@@ -496,7 +496,7 @@ struct find_reshape_cont
match
::
any
()));
match
::
any
()));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
ins_cont
=
r
.
instructions
[
"cont"
];
auto
ins_cont
=
r
.
instructions
[
"cont"
];
...
@@ -530,11 +530,11 @@ struct find_reshape_cont
...
@@ -530,11 +530,11 @@ struct find_reshape_cont
else
else
{
{
inputs
.
push_back
(
inputs
.
push_back
(
p
.
insert_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
dims
}}),
in
));
m
.
insert_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
dims
}}),
in
));
}
}
}
}
auto
out
=
p
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
inputs
);
auto
out
=
m
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
inputs
);
p
.
replace_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
out_dims
}}),
out
);
m
.
replace_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
out_dims
}}),
out
);
}
}
};
};
...
@@ -564,25 +564,25 @@ struct find_transpose_contiguous_reshaper_unary
...
@@ -564,25 +564,25 @@ struct find_transpose_contiguous_reshaper_unary
match
::
args
(
match_transpose_contiguous_reshaper
()));
match
::
args
(
match_transpose_contiguous_reshaper
()));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
reshaper_ins
=
r
.
instructions
[
"reshaper_ins"
];
auto
reshaper_ins
=
r
.
instructions
[
"reshaper_ins"
];
auto
trans_ins
=
r
.
instructions
[
"trans_ins"
];
auto
trans_ins
=
r
.
instructions
[
"trans_ins"
];
auto
cont_ins
=
r
.
instructions
[
"cont_ins"
];
auto
cont_ins
=
r
.
instructions
[
"cont_ins"
];
auto
unary_op_name
=
ins
->
get_operator
().
name
();
auto
unary_op_name
=
ins
->
get_operator
().
name
();
auto
unary_ins
=
p
.
insert_instruction
(
cont_ins
,
make_op
(
unary_op_name
),
trans_ins
);
auto
unary_ins
=
m
.
insert_instruction
(
cont_ins
,
make_op
(
unary_op_name
),
trans_ins
);
auto
new_cont_ins
=
p
.
insert_instruction
(
cont_ins
,
make_op
(
"contiguous"
),
unary_ins
);
auto
new_cont_ins
=
m
.
insert_instruction
(
cont_ins
,
make_op
(
"contiguous"
),
unary_ins
);
// older cont and reshape are removed by deadcode elimination
// older cont and reshape are removed by deadcode elimination
p
.
replace_instruction
(
ins
,
reshaper_ins
->
get_operator
(),
new_cont_ins
);
m
.
replace_instruction
(
ins
,
reshaper_ins
->
get_operator
(),
new_cont_ins
);
}
}
};
};
void
simplify_reshapes
::
apply
(
module
&
p
)
const
void
simplify_reshapes
::
apply
(
module
&
m
)
const
{
{
for
(
int
i
=
0
;
i
<
2
;
i
++
)
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
{
match
::
find_matches
(
p
,
match
::
find_matches
(
m
,
find_where_op
{},
find_where_op
{},
find_resize
{},
find_resize
{},
find_reshape_cont
{},
find_reshape_cont
{},
...
@@ -594,7 +594,7 @@ void simplify_reshapes::apply(module& p) const
...
@@ -594,7 +594,7 @@ void simplify_reshapes::apply(module& p) const
find_nested_slice
{},
find_nested_slice
{},
find_nested_concat
{},
find_nested_concat
{},
find_transpose_contiguous_reshaper_unary
{});
find_transpose_contiguous_reshaper_unary
{});
dead_code_elimination
{}.
apply
(
p
);
dead_code_elimination
{}.
apply
(
m
);
}
}
}
}
...
...
src/targets/cpu/lowering.cpp
View file @
faefeef9
...
@@ -352,7 +352,7 @@ struct cpu_apply
...
@@ -352,7 +352,7 @@ struct cpu_apply
std
::
transform
(
bind_inputs
.
begin
(),
std
::
transform
(
bind_inputs
.
begin
(),
bind_inputs
.
end
(),
bind_inputs
.
end
(),
std
::
back_inserter
(
inputs
),
std
::
back_inserter
(
inputs
),
[
&
](
const
auto
&
s
)
{
return
r
.
instructions
.
at
(
s
)
;
});
[
&
](
const
auto
&
s
)
{
return
r
.
instructions
[
s
]
;
});
inputs
.
push_back
(
this
->
insert_allocation
(
ins
,
ins
->
get_shape
()));
inputs
.
push_back
(
this
->
insert_allocation
(
ins
,
ins
->
get_shape
()));
modl
->
replace_instruction
(
ins
,
op
,
inputs
);
modl
->
replace_instruction
(
ins
,
op
,
inputs
);
});
});
...
...
src/targets/gpu/CMakeLists.txt
View file @
faefeef9
...
@@ -158,6 +158,7 @@ add_library(migraphx_gpu
...
@@ -158,6 +158,7 @@ add_library(migraphx_gpu
nonzero.cpp
nonzero.cpp
pack_args.cpp
pack_args.cpp
pack_int8_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
pad.cpp
pad.cpp
pooling.cpp
pooling.cpp
quant_convolution.cpp
quant_convolution.cpp
...
...
src/targets/gpu/analyze_streams.cpp
View file @
faefeef9
...
@@ -28,30 +28,30 @@ struct hip_stream_model
...
@@ -28,30 +28,30 @@ struct hip_stream_model
bool
is_wait
(
migraphx
::
instruction_ref
ins
)
const
{
return
ins
->
name
()
==
"gpu::wait_event"
;
}
bool
is_wait
(
migraphx
::
instruction_ref
ins
)
const
{
return
ins
->
name
()
==
"gpu::wait_event"
;
}
};
};
stream_model
make_stream_model
(
const
module
&
p
)
stream_model
make_stream_model
(
const
module
&
m
)
{
{
hip_stream_model
m
;
hip_stream_model
hs
m
;
std
::
size_t
stream
=
0
;
std
::
size_t
stream
=
0
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
if
(
ins
->
name
()
==
"gpu::set_stream"
)
if
(
ins
->
name
()
==
"gpu::set_stream"
)
{
{
auto
v
=
ins
->
get_operator
().
to_value
();
auto
v
=
ins
->
get_operator
().
to_value
();
stream
=
v
[
"stream"
].
to
<
std
::
size_t
>
();
stream
=
v
[
"stream"
].
to
<
std
::
size_t
>
();
m
.
max_stream
=
std
::
max
(
stream
,
m
.
max_stream
);
hs
m
.
max_stream
=
std
::
max
(
stream
,
hs
m
.
max_stream
);
}
}
if
(
ins
->
get_operator
().
is_context_free
())
if
(
ins
->
get_operator
().
is_context_free
())
continue
;
continue
;
if
(
contains
({
"hip::hip_allocate_memory"
,
"hip::hip_copy_literal"
,
"@param"
},
ins
->
name
()))
if
(
contains
({
"hip::hip_allocate_memory"
,
"hip::hip_copy_literal"
,
"@param"
},
ins
->
name
()))
continue
;
continue
;
m
.
ins2stream
[
ins
]
=
stream
;
hs
m
.
ins2stream
[
ins
]
=
stream
;
}
}
return
m
;
return
hs
m
;
}
}
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
p
)
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
)
{
{
return
migraphx
::
analyze_streams
(
p
,
make_stream_model
(
p
));
return
migraphx
::
analyze_streams
(
m
,
make_stream_model
(
m
));
}
}
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/compile_hip.cpp
View file @
faefeef9
...
@@ -22,6 +22,7 @@ namespace gpu {
...
@@ -22,6 +22,7 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_OPTIMIZE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_OPTIMIZE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_ASM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_ASM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_SRC
);
#if MIGRAPHX_USE_HIPRTC
#if MIGRAPHX_USE_HIPRTC
...
@@ -247,6 +248,16 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -247,6 +248,16 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
MIGRAPHX_THROW
(
"Missing hsaco"
);
MIGRAPHX_THROW
(
"Missing hsaco"
);
};
};
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
{
for
(
const
auto
&
src
:
srcs
)
{
if
(
src
.
path
.
extension
()
!=
".cpp"
)
continue
;
std
::
cout
<<
std
::
string
(
src
.
content
.
first
,
src
.
len
())
<<
std
::
endl
;
}
}
if
(
enabled
(
MIGRAPHX_GPU_DUMP_ASM
{}))
if
(
enabled
(
MIGRAPHX_GPU_DUMP_ASM
{}))
{
{
...
...
src/targets/gpu/eliminate_workspace.cpp
View file @
faefeef9
...
@@ -11,11 +11,11 @@ namespace migraphx {
...
@@ -11,11 +11,11 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
void
eliminate_workspace
::
apply
(
module
&
p
)
const
void
eliminate_workspace
::
apply
(
module
&
m
)
const
{
{
std
::
size_t
n
=
0
;
std
::
size_t
n
=
0
;
std
::
vector
<
instruction_ref
>
allocs
;
std
::
vector
<
instruction_ref
>
allocs
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
if
(
ins
->
outputs
().
size
()
!=
1
)
if
(
ins
->
outputs
().
size
()
!=
1
)
continue
;
continue
;
...
@@ -30,11 +30,11 @@ void eliminate_workspace::apply(module& p) const
...
@@ -30,11 +30,11 @@ void eliminate_workspace::apply(module& p) const
}
}
if
(
n
>
0
)
if
(
n
>
0
)
{
{
auto
ws
=
p
.
add_parameter
(
"workspace"
,
shape
{
shape
::
int8_type
,
{
n
}});
auto
ws
=
m
.
add_parameter
(
"workspace"
,
shape
{
shape
::
int8_type
,
{
n
}});
for
(
auto
&&
a
:
allocs
)
for
(
auto
&&
a
:
allocs
)
{
{
p
.
replace_instruction
(
a
,
ws
);
m
.
replace_instruction
(
a
,
ws
);
p
.
remove_instruction
(
a
);
m
.
remove_instruction
(
a
);
}
}
}
}
}
}
...
...
src/targets/gpu/fuse_ops.cpp
View file @
faefeef9
...
@@ -316,7 +316,7 @@ struct find_layernorm
...
@@ -316,7 +316,7 @@ struct find_layernorm
{
{
auto
matcher
()
const
{
return
match
::
layernorm
(
&
gpu_name
);
}
auto
matcher
()
const
{
return
match
::
layernorm
(
&
gpu_name
);
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
...
@@ -331,7 +331,7 @@ struct find_layernorm
...
@@ -331,7 +331,7 @@ struct find_layernorm
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
return
;
p
.
replace_instruction
(
ins
,
hip_layernorm
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_layernorm
{},
x_ins
,
args
.
back
());
}
}
};
};
...
@@ -343,11 +343,11 @@ struct find_triadd_layernorm
...
@@ -343,11 +343,11 @@ struct find_triadd_layernorm
match
::
used_once
(),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()))));
match
::
used_once
(),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()))));
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
triadd
=
ins
->
inputs
().
front
();
auto
triadd
=
ins
->
inputs
().
front
();
p
.
replace_instruction
(
ins
,
hip_triadd_layernorm
{},
triadd
->
inputs
());
m
.
replace_instruction
(
ins
,
hip_triadd_layernorm
{},
triadd
->
inputs
());
}
}
};
};
...
@@ -355,13 +355,13 @@ struct find_gelu
...
@@ -355,13 +355,13 @@ struct find_gelu
{
{
auto
matcher
()
const
{
return
match
::
gelu_erf
(
&
gpu_name
);
}
auto
matcher
()
const
{
return
match
::
gelu_erf
(
&
gpu_name
);
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
auto
args
=
ins
->
inputs
();
p
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
}
}
};
};
...
@@ -372,7 +372,7 @@ struct find_add_gelu
...
@@ -372,7 +372,7 @@ struct find_add_gelu
return
match
::
name
(
"gpu::gelu"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
return
match
::
name
(
"gpu::gelu"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
...
@@ -381,7 +381,7 @@ struct find_add_gelu
...
@@ -381,7 +381,7 @@ struct find_add_gelu
move_broadcasted_back
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_add_gelu
{},
args
);
m
.
replace_instruction
(
ins
,
hip_add_gelu
{},
args
);
}
}
};
};
...
@@ -391,16 +391,16 @@ struct find_gelu_new
...
@@ -391,16 +391,16 @@ struct find_gelu_new
auto
matcher
()
const
{
return
match
::
gelu_tanh
(
&
gpu_name
);
}
auto
matcher
()
const
{
return
match
::
gelu_tanh
(
&
gpu_name
);
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
auto
args
=
ins
->
inputs
();
if
(
fast_math
)
if
(
fast_math
)
p
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
else
else
p
.
replace_instruction
(
ins
,
hip_gelu_new
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_gelu_new
{},
x_ins
,
args
.
back
());
}
}
};
};
...
@@ -411,7 +411,7 @@ struct find_add_gelu_new
...
@@ -411,7 +411,7 @@ struct find_add_gelu_new
return
match
::
name
(
"gpu::gelu_new"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
return
match
::
name
(
"gpu::gelu_new"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
...
@@ -420,7 +420,7 @@ struct find_add_gelu_new
...
@@ -420,7 +420,7 @@ struct find_add_gelu_new
move_broadcasted_back
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_add_gelu_new
{},
args
);
m
.
replace_instruction
(
ins
,
hip_add_gelu_new
{},
args
);
}
}
};
};
...
@@ -435,7 +435,7 @@ struct find_add_clip
...
@@ -435,7 +435,7 @@ struct find_add_clip
.
bind
(
"add"
)));
.
bind
(
"add"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
...
@@ -448,9 +448,9 @@ struct find_add_clip
...
@@ -448,9 +448,9 @@ struct find_add_clip
add_args
.
pop_back
();
add_args
.
pop_back
();
add_args
.
insert
(
add_args
.
end
(),
std
::
next
(
ins_args
.
begin
()),
ins_args
.
end
());
add_args
.
insert
(
add_args
.
end
(),
std
::
next
(
ins_args
.
begin
()),
ins_args
.
end
());
if
(
add_ins
->
name
()
==
"gpu::add"
)
if
(
add_ins
->
name
()
==
"gpu::add"
)
p
.
replace_instruction
(
ins
,
hip_add_clip
{},
add_args
);
m
.
replace_instruction
(
ins
,
hip_add_clip
{},
add_args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
p
.
replace_instruction
(
ins
,
hip_triadd_clip
{},
add_args
);
m
.
replace_instruction
(
ins
,
hip_triadd_clip
{},
add_args
);
}
}
};
};
...
@@ -470,7 +470,7 @@ struct find_add_unary
...
@@ -470,7 +470,7 @@ struct find_add_unary
.
bind
(
"add"
)));
.
bind
(
"add"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
...
@@ -481,9 +481,9 @@ struct find_add_unary
...
@@ -481,9 +481,9 @@ struct find_add_unary
// Use the allocation from the relu operator
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
args
.
back
()
=
ins
->
inputs
().
back
();
if
(
add_ins
->
name
()
==
"gpu::add"
)
if
(
add_ins
->
name
()
==
"gpu::add"
)
p
.
replace_instruction
(
ins
,
binary_add_op
,
args
);
m
.
replace_instruction
(
ins
,
binary_add_op
,
args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
p
.
replace_instruction
(
ins
,
ternary_add_op
,
args
);
m
.
replace_instruction
(
ins
,
ternary_add_op
,
args
);
}
}
};
};
...
@@ -498,7 +498,7 @@ struct find_triadd
...
@@ -498,7 +498,7 @@ struct find_triadd
.
bind
(
"input"
)));
.
bind
(
"input"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
input_ins
=
r
.
instructions
[
"input"
];
auto
input_ins
=
r
.
instructions
[
"input"
];
...
@@ -513,7 +513,7 @@ struct find_triadd
...
@@ -513,7 +513,7 @@ struct find_triadd
move_broadcasted_back
(
args
);
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_triadd
{},
args
);
m
.
replace_instruction
(
ins
,
hip_triadd
{},
args
);
}
}
};
};
...
@@ -525,7 +525,7 @@ struct find_mul_add
...
@@ -525,7 +525,7 @@ struct find_mul_add
match
::
name
(
"gpu::mul"
)(
match
::
used_once
()).
bind
(
"mul"
),
match
::
any
().
bind
(
"b"
)));
match
::
name
(
"gpu::mul"
)(
match
::
used_once
()).
bind
(
"mul"
),
match
::
any
().
bind
(
"b"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
mul_ins
=
r
.
instructions
[
"mul"
];
auto
mul_ins
=
r
.
instructions
[
"mul"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
...
@@ -538,7 +538,7 @@ struct find_mul_add
...
@@ -538,7 +538,7 @@ struct find_mul_add
args
.
insert
(
std
::
prev
(
args
.
end
()),
b_ins
);
args
.
insert
(
std
::
prev
(
args
.
end
()),
b_ins
);
args
.
back
()
=
ins
->
inputs
().
back
();
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_mul_add
{},
args
);
m
.
replace_instruction
(
ins
,
hip_mul_add
{},
args
);
}
}
};
};
...
@@ -550,7 +550,7 @@ struct find_mul_add_relu
...
@@ -550,7 +550,7 @@ struct find_mul_add_relu
match
::
arg
(
0
)(
match
::
name
(
"gpu::mul_add"
)(
match
::
used_once
()).
bind
(
"mul_add"
)));
match
::
arg
(
0
)(
match
::
name
(
"gpu::mul_add"
)(
match
::
used_once
()).
bind
(
"mul_add"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
mul_add_ins
=
r
.
instructions
[
"mul_add"
];
auto
mul_add_ins
=
r
.
instructions
[
"mul_add"
];
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
...
@@ -558,7 +558,7 @@ struct find_mul_add_relu
...
@@ -558,7 +558,7 @@ struct find_mul_add_relu
// Use the allocation from the relu operator
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_mul_add_relu
{},
args
);
m
.
replace_instruction
(
ins
,
hip_mul_add_relu
{},
args
);
}
}
};
};
...
@@ -783,7 +783,7 @@ auto conv_bias(Ms... ms)
...
@@ -783,7 +783,7 @@ auto conv_bias(Ms... ms)
}
}
template
<
class
Op
>
template
<
class
Op
>
void
apply_conv_bias
(
context
&
ctx
,
module
&
p
,
match
::
matcher_result
r
)
void
apply_conv_bias
(
context
&
ctx
,
module
&
m
,
const
match
::
matcher_result
&
r
)
{
{
auto
conv_ins
=
r
.
instructions
[
"conv"
];
auto
conv_ins
=
r
.
instructions
[
"conv"
];
auto
bias_ins
=
r
.
instructions
[
"bias"
];
auto
bias_ins
=
r
.
instructions
[
"bias"
];
...
@@ -798,7 +798,7 @@ void apply_conv_bias(context& ctx, module& p, match::matcher_result r)
...
@@ -798,7 +798,7 @@ void apply_conv_bias(context& ctx, module& p, match::matcher_result r)
// TODO: Insert ws allocation
// TODO: Insert ws allocation
auto
ws
=
cb
.
get_workspace
(
ctx
);
auto
ws
=
cb
.
get_workspace
(
ctx
);
(
void
)
ws
;
(
void
)
ws
;
p
.
replace_instruction
(
ins
,
cb
,
input_ins
,
weights_ins
,
old_ws_ins
,
bias_ins
,
alloc_ins
);
m
.
replace_instruction
(
ins
,
cb
,
input_ins
,
weights_ins
,
old_ws_ins
,
bias_ins
,
alloc_ins
);
}
}
inline
auto
precompile_name
(
std
::
string
s
)
// NOLINT
inline
auto
precompile_name
(
std
::
string
s
)
// NOLINT
...
@@ -829,9 +829,9 @@ struct find_conv_bias
...
@@ -829,9 +829,9 @@ struct find_conv_bias
match
::
output
(
match
::
name
(
std
::
unordered_set
<
std
::
string
>
{
"gpu::relu"
}))));
match
::
output
(
match
::
name
(
std
::
unordered_set
<
std
::
string
>
{
"gpu::relu"
}))));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
apply_conv_bias
<
miopen_conv_bias
>
(
*
ctx
,
p
,
std
::
move
(
r
)
);
apply_conv_bias
<
miopen_conv_bias
>
(
*
ctx
,
m
,
r
);
}
}
};
};
...
@@ -840,9 +840,9 @@ struct find_conv_bias_relu
...
@@ -840,9 +840,9 @@ struct find_conv_bias_relu
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
auto
matcher
()
const
{
return
match
::
name
(
"gpu::relu"
)(
match
::
arg
(
0
)(
conv_bias
()));
}
auto
matcher
()
const
{
return
match
::
name
(
"gpu::relu"
)(
match
::
arg
(
0
)(
conv_bias
()));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
apply_conv_bias
<
miopen_conv_bias_relu
>
(
*
ctx
,
p
,
std
::
move
(
r
)
);
apply_conv_bias
<
miopen_conv_bias_relu
>
(
*
ctx
,
m
,
r
);
}
}
};
};
...
@@ -857,7 +857,7 @@ struct find_conv_pointwise
...
@@ -857,7 +857,7 @@ struct find_conv_pointwise
fusable_conv
(
match
::
used_once
()).
bind
(
"conv"
)));
fusable_conv
(
match
::
used_once
()).
bind
(
"conv"
)));
}
}
void
apply
(
module
&
m
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
conv_ins
=
r
.
instructions
[
"conv"
];
auto
conv_ins
=
r
.
instructions
[
"conv"
];
auto
bias_ins
=
r
.
instructions
[
"bias"
];
auto
bias_ins
=
r
.
instructions
[
"bias"
];
...
@@ -896,7 +896,7 @@ struct find_gemm_add
...
@@ -896,7 +896,7 @@ struct find_gemm_add
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
}
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
...
@@ -908,26 +908,68 @@ struct find_gemm_add
...
@@ -908,26 +908,68 @@ struct find_gemm_add
if
(
not
float_equal
(
gemm
.
beta
,
0
))
if
(
not
float_equal
(
gemm
.
beta
,
0
))
return
;
return
;
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[](
auto
i
)
{
return
not
i
->
get_shape
().
standard
();
}))
return
;
auto
inputs
=
gemm_ins
->
inputs
();
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
inputs
.
pop_back
();
auto
copy_ins
=
c_ins
;
auto
copy_ins
=
c_ins
;
// Insert copy
// Insert copy
if
(
ins
==
p
.
end
()
or
c_ins
->
outputs
().
size
()
>
1
or
c_ins
->
inputs
().
empty
())
if
(
ins
==
m
.
end
()
or
c_ins
->
outputs
().
size
()
>
1
or
c_ins
->
inputs
().
empty
())
{
{
copy_ins
=
p
.
insert_instruction
(
ins
,
hip_copy
{},
c_ins
,
ins
->
inputs
().
back
());
copy_ins
=
m
.
insert_instruction
(
ins
,
hip_copy
{},
c_ins
,
ins
->
inputs
().
back
());
}
}
inputs
.
push_back
(
copy_ins
);
inputs
.
push_back
(
copy_ins
);
inputs
.
push_back
(
copy_ins
);
inputs
.
push_back
(
copy_ins
);
gemm
.
beta
=
1
;
gemm
.
beta
=
1
;
p
.
replace_instruction
(
ins
,
gemm
,
inputs
);
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
}
};
auto
pointwise_name
(
const
std
::
string
&
s
)
{
return
precompile_name
(
"pointwise"
)(
match
::
make_basic_pred_matcher
([
=
](
auto
ins
)
{
module_ref
pm
=
ins
->
module_inputs
().
front
();
auto
n
=
std
::
count_if
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
auto
&
i
)
{
return
i
.
name
()
==
s
;
});
if
(
n
!=
1
)
return
false
;
return
std
::
all_of
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
auto
&
i
)
{
return
starts_with
(
i
.
name
(),
"@"
)
or
i
.
name
()
==
s
;
});
}));
}
struct
find_gemm_pointwise
{
auto
matcher
()
const
{
return
pointwise_name
(
"add"
)(
match
::
nargs
(
3
),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()),
match
::
either_arg
(
0
,
1
)(
match
::
used_once
().
bind
(
"c"
),
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
auto
c_ins
=
r
.
instructions
[
"c"
];
auto
gemm
=
any_cast
<
rocblas_gemm
<
op
::
dot
>>
(
gemm_ins
->
get_operator
());
// Already fused gemm
if
(
not
float_equal
(
gemm
.
beta
,
0
))
return
;
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
inputs
.
push_back
(
c_ins
);
inputs
.
push_back
(
gemm_ins
->
inputs
().
back
());
gemm
.
beta
=
1
;
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
}
}
};
};
...
@@ -938,22 +980,22 @@ struct find_commutative_broadcast
...
@@ -938,22 +980,22 @@ struct find_commutative_broadcast
return
match
::
name
(
"gpu::add"
,
"gpu::mul"
)(
match
::
arg
(
1
)(
match
::
broadcast_shape
()));
return
match
::
name
(
"gpu::add"
,
"gpu::mul"
)(
match
::
arg
(
1
)(
match
::
broadcast_shape
()));
}
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
args
=
ins
->
inputs
();
auto
args
=
ins
->
inputs
();
move_broadcasted_back
(
args
);
move_broadcasted_back
(
args
);
p
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
m
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
}
}
};
};
void
fuse_ops
::
apply
(
module
&
p
)
const
void
fuse_ops
::
apply
(
module
&
m
)
const
{
{
match
::
find_matches
(
p
,
find_gelu
{},
find_gelu_new
{
fast_math
});
match
::
find_matches
(
m
,
find_gelu
{},
find_gelu_new
{
fast_math
});
run_passes
(
p
,
{
dead_code_elimination
{}});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
p
,
find_triadd
{});
match
::
find_matches
(
m
,
find_triadd
{});
match
::
find_matches
(
p
,
match
::
find_matches
(
m
,
find_layernorm
{},
find_layernorm
{},
find_conv_pointwise
{
ctx
},
find_conv_pointwise
{
ctx
},
find_conv_bias_relu
{
ctx
},
find_conv_bias_relu
{
ctx
},
...
@@ -966,8 +1008,12 @@ void fuse_ops::apply(module& p) const
...
@@ -966,8 +1008,12 @@ void fuse_ops::apply(module& p) const
find_add_unary
{
"gpu::sigmoid"
,
hip_add_sigmoid
{},
hip_triadd_sigmoid
{}},
find_add_unary
{
"gpu::sigmoid"
,
hip_add_sigmoid
{},
hip_triadd_sigmoid
{}},
find_add_unary
{
"gpu::tanh"
,
hip_add_tanh
{},
hip_triadd_tanh
{}},
find_add_unary
{
"gpu::tanh"
,
hip_add_tanh
{},
hip_triadd_tanh
{}},
find_add_clip
{});
find_add_clip
{});
run_passes
(
p
,
{
dead_code_elimination
{}});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
p
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_commutative_broadcast
{});
match
::
find_matches
(
m
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_gemm_pointwise
{},
find_commutative_broadcast
{});
}
}
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/gemm_impl.cpp
View file @
faefeef9
#include <rocblas.h>
#include <rocblas.h>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/reduce_dims.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -27,6 +28,22 @@ rocblas_datatype get_type(shape::type_t type)
...
@@ -27,6 +28,22 @@ rocblas_datatype get_type(shape::type_t type)
MIGRAPHX_THROW
(
"ROCBLAS_GEMM: data type not supported!"
);
MIGRAPHX_THROW
(
"ROCBLAS_GEMM: data type not supported!"
);
}
}
void
blas_shape
(
const
shape
&
s
)
{
if
(
s
.
lens
().
size
()
<
2
)
return
;
if
(
std
::
none_of
(
s
.
strides
().
end
()
-
2
,
s
.
strides
().
end
(),
[
&
](
auto
i
)
{
return
i
==
1
;
}))
MIGRAPHX_THROW
(
"GPU_GEMM: needs to have one matrix stride as 1"
);
if
(
s
.
lens
().
size
()
<
3
)
return
;
shape
batch_shape
{
s
.
type
(),
{
s
.
lens
().
begin
(),
s
.
lens
().
end
()
-
2
},
{
s
.
strides
().
begin
(),
s
.
strides
().
end
()
-
2
}};
auto
batch_shapes
=
reduce_dims
({
batch_shape
});
if
(
batch_shapes
.
front
().
lens
().
size
()
!=
1
)
MIGRAPHX_THROW
(
"GPU_GEMM: Batch dimension is not collapsible"
);
}
template
<
class
R
,
class
...
Ts
,
class
...
Us
>
template
<
class
R
,
class
...
Ts
,
class
...
Us
>
R
rocblas_invoke
(
R
(
*
f
)(
Ts
...),
Us
...
xs
)
R
rocblas_invoke
(
R
(
*
f
)(
Ts
...),
Us
...
xs
)
{
{
...
@@ -36,6 +53,18 @@ R rocblas_invoke(R (*f)(Ts...), Us... xs)
...
@@ -36,6 +53,18 @@ R rocblas_invoke(R (*f)(Ts...), Us... xs)
return
f
(
xs
...,
nullptr
,
nullptr
);
return
f
(
xs
...,
nullptr
,
nullptr
);
}
}
static
bool
is_transposed
(
const
shape
&
s
)
{
if
(
not
s
.
transposed
())
return
false
;
return
s
.
strides
().
back
()
!=
1
;
}
static
rocblas_int
get_batch_stride
(
const
argument
&
a
)
{
return
a
.
get_shape
().
strides
()[
a
.
get_shape
().
strides
().
size
()
-
3
];
}
template
<
class
T
>
template
<
class
T
>
void
gemm_impl
(
context
&
ctx
,
void
gemm_impl
(
context
&
ctx
,
const
shape
&
output_shape
,
const
shape
&
output_shape
,
...
@@ -45,8 +74,8 @@ void gemm_impl(context& ctx,
...
@@ -45,8 +74,8 @@ void gemm_impl(context& ctx,
bool
int8_x4_format
,
bool
int8_x4_format
,
bool
compute_fp32
)
bool
compute_fp32
)
{
{
bool
transa
=
args
[
0
].
get_shape
()
.
transposed
(
);
bool
transa
=
is_transposed
(
args
[
0
].
get_shape
());
bool
transb
=
args
[
1
].
get_shape
()
.
transposed
(
);
bool
transb
=
is_transposed
(
args
[
1
].
get_shape
());
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
dim_1
=
n_dim
-
1
;
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
auto
dim_0
=
n_dim
-
2
;
...
@@ -142,6 +171,9 @@ void gemm_impl(context& ctx,
...
@@ -142,6 +171,9 @@ void gemm_impl(context& ctx,
}
}
else
else
{
{
auto
a_stride
=
get_batch_stride
(
args
[
0
]);
auto
b_stride
=
get_batch_stride
(
args
[
1
]);
auto
c_stride
=
get_batch_stride
(
args
[
2
]);
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex
,
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex
,
ctx
.
get_stream
().
get_rocblas
(),
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
@@ -153,20 +185,20 @@ void gemm_impl(context& ctx,
...
@@ -153,20 +185,20 @@ void gemm_impl(context& ctx,
to_pointer
(
args
.
at
(
1
)),
to_pointer
(
args
.
at
(
1
)),
arg_type
,
arg_type
,
ldb
,
ldb
,
k
*
n
,
b_stride
,
to_pointer
(
args
.
at
(
0
)),
to_pointer
(
args
.
at
(
0
)),
arg_type
,
arg_type
,
lda
,
lda
,
m
*
k
,
a_stride
,
beta_v
,
beta_v
,
to_pointer
(
args
[
2
]),
to_pointer
(
args
[
2
]),
output_type
,
output_type
,
ldc
,
ldc
,
m
*
n
,
c_stride
,
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
output_type
,
output_type
,
ldc
,
ldc
,
m
*
n
,
c_stride
,
num_matrices
,
num_matrices
,
compute_type
,
compute_type
,
rocblas_gemm_algo_standard
,
rocblas_gemm_algo_standard
,
...
...
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
View file @
faefeef9
...
@@ -11,7 +11,7 @@ struct module;
...
@@ -11,7 +11,7 @@ struct module;
namespace
gpu
{
namespace
gpu
{
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
p
);
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
View file @
faefeef9
...
@@ -14,7 +14,7 @@ namespace gpu {
...
@@ -14,7 +14,7 @@ namespace gpu {
struct
eliminate_workspace
struct
eliminate_workspace
{
{
std
::
string
name
()
const
{
return
"eliminate_workspace"
;
}
std
::
string
name
()
const
{
return
"eliminate_workspace"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
View file @
faefeef9
...
@@ -16,7 +16,7 @@ struct fuse_ops
...
@@ -16,7 +16,7 @@ struct fuse_ops
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
bool
fast_math
=
true
;
bool
fast_math
=
true
;
std
::
string
name
()
const
{
return
"gpu::fuse_ops"
;
}
std
::
string
name
()
const
{
return
"gpu::fuse_ops"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
faefeef9
...
@@ -18,6 +18,8 @@ namespace gpu {
...
@@ -18,6 +18,8 @@ namespace gpu {
struct
context
;
struct
context
;
void
blas_shape
(
const
shape
&
s
);
template
<
class
Op
>
template
<
class
Op
>
struct
rocblas_gemm
struct
rocblas_gemm
{
{
...
@@ -50,13 +52,14 @@ struct rocblas_gemm
...
@@ -50,13 +52,14 @@ struct rocblas_gemm
std
::
vector
<
shape
>
in_shapes
(
inputs
);
std
::
vector
<
shape
>
in_shapes
(
inputs
);
in_shapes
.
pop_back
();
in_shapes
.
pop_back
();
check_shapes
{
in_shapes
,
*
this
}.
not_broadcasted
();
check_shapes
{
in_shapes
,
*
this
}.
not_broadcasted
();
b
atch_not_transposed
(
inputs
[
0
].
strides
()
);
b
las_shape
(
inputs
[
0
]
);
b
atch_not_transposed
(
inputs
[
1
].
strides
()
);
b
las_shape
(
inputs
[
1
]
);
// if gemm and add are fused
// if gemm and add are fused
if
(
not
float_equal
(
beta
,
0
)
)
if
(
in_shapes
.
size
()
>
2
)
{
{
auto
cmat_shape
=
in_shapes
.
back
();
auto
cmat_shape
=
in_shapes
.
back
();
in_shapes
.
pop_back
();
in_shapes
.
pop_back
();
blas_shape
(
cmat_shape
);
auto
op_out_shape
=
op
.
compute_shape
(
in_shapes
);
auto
op_out_shape
=
op
.
compute_shape
(
in_shapes
);
if
(
cmat_shape
.
lens
()
!=
op_out_shape
.
lens
())
if
(
cmat_shape
.
lens
()
!=
op_out_shape
.
lens
())
{
{
...
@@ -71,6 +74,7 @@ struct rocblas_gemm
...
@@ -71,6 +74,7 @@ struct rocblas_gemm
to_string
(
cmat_shape
.
type
())
+
to_string
(
cmat_shape
.
type
())
+
", it must be: "
+
to_string
(
op_out_shape
.
type
()));
", it must be: "
+
to_string
(
op_out_shape
.
type
()));
}
}
return
op_out_shape
;
}
}
return
op
.
compute_shape
(
in_shapes
);
return
op
.
compute_shape
(
in_shapes
);
...
@@ -96,28 +100,6 @@ struct rocblas_gemm
...
@@ -96,28 +100,6 @@ struct rocblas_gemm
return
args
.
back
();
return
args
.
back
();
}
}
void
batch_not_transposed
(
const
std
::
vector
<
std
::
size_t
>&
strides
)
const
{
if
(
strides
.
size
()
<=
2
)
return
;
auto
dim_0
=
strides
.
size
()
-
2
;
auto
matrix_size
=
std
::
max
(
strides
[
dim_0
],
strides
[
dim_0
+
1
]);
std
::
vector
<
std
::
size_t
>
batch
(
strides
.
begin
(),
strides
.
begin
()
+
dim_0
);
if
(
std
::
all_of
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
)
{
return
(
i
<
matrix_size
);
}))
{
MIGRAPHX_THROW
(
"GPU_GEMM: matrix size and batch size {"
+
to_string_range
(
strides
)
+
"} are transposed!"
);
}
if
(
std
::
adjacent_find
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
,
auto
j
)
{
return
(
i
<
j
or
i
<
matrix_size
or
j
<
matrix_size
);
})
!=
batch
.
end
())
{
MIGRAPHX_THROW
(
"GPU_GEMM: batch size {"
+
to_string_range
(
strides
)
+
"} is transposed!"
);
}
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
...
...
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
0 → 100644
View file @
faefeef9
#ifndef MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
#define MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
struct
prefuse_ops
{
std
::
string
name
()
const
{
return
"gpu::prefuse_ops"
;
}
void
apply
(
module
&
m
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment