Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
d9a5acbd
Unverified
Commit
d9a5acbd
authored
May 17, 2022
by
Paul Fultz II
Committed by
GitHub
May 17, 2022
Browse files
Merge branch 'develop' into jit-vector-reduce
parents
d0b7fc9a
a27dd28c
Changes
60
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
248 additions
and
102 deletions
+248
-102
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+1
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-0
src/targets/gpu/analyze_streams.cpp
src/targets/gpu/analyze_streams.cpp
+10
-10
src/targets/gpu/eliminate_workspace.cpp
src/targets/gpu/eliminate_workspace.cpp
+5
-5
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+45
-45
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
+24
-0
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
+3
-3
src/targets/gpu/include/migraphx/gpu/sync_device.hpp
src/targets/gpu/include/migraphx/gpu/sync_device.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
+1
-1
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+76
-0
src/targets/gpu/schedule_model.cpp
src/targets/gpu/schedule_model.cpp
+8
-8
src/targets/gpu/sync_device.cpp
src/targets/gpu/sync_device.cpp
+4
-4
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+3
-0
src/targets/gpu/write_literals.cpp
src/targets/gpu/write_literals.cpp
+7
-7
test/dead_code_elimination_test.cpp
test/dead_code_elimination_test.cpp
+34
-0
test/matcher.cpp
test/matcher.cpp
+12
-12
tools/install_prereqs.sh
tools/install_prereqs.sh
+10
-2
No files found.
src/targets/cpu/lowering.cpp
View file @
d9a5acbd
...
...
@@ -352,7 +352,7 @@ struct cpu_apply
std
::
transform
(
bind_inputs
.
begin
(),
bind_inputs
.
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
const
auto
&
s
)
{
return
r
.
instructions
.
at
(
s
)
;
});
[
&
](
const
auto
&
s
)
{
return
r
.
instructions
[
s
]
;
});
inputs
.
push_back
(
this
->
insert_allocation
(
ins
,
ins
->
get_shape
()));
modl
->
replace_instruction
(
ins
,
op
,
inputs
);
});
...
...
src/targets/gpu/CMakeLists.txt
View file @
d9a5acbd
...
...
@@ -159,6 +159,7 @@ add_library(migraphx_gpu
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
pad.cpp
pooling.cpp
quant_convolution.cpp
...
...
src/targets/gpu/analyze_streams.cpp
View file @
d9a5acbd
...
...
@@ -28,30 +28,30 @@ struct hip_stream_model
bool
is_wait
(
migraphx
::
instruction_ref
ins
)
const
{
return
ins
->
name
()
==
"gpu::wait_event"
;
}
};
stream_model
make_stream_model
(
const
module
&
p
)
stream_model
make_stream_model
(
const
module
&
m
)
{
hip_stream_model
m
;
hip_stream_model
hs
m
;
std
::
size_t
stream
=
0
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"gpu::set_stream"
)
{
auto
v
=
ins
->
get_operator
().
to_value
();
stream
=
v
[
"stream"
].
to
<
std
::
size_t
>
();
m
.
max_stream
=
std
::
max
(
stream
,
m
.
max_stream
);
auto
v
=
ins
->
get_operator
().
to_value
();
stream
=
v
[
"stream"
].
to
<
std
::
size_t
>
();
hs
m
.
max_stream
=
std
::
max
(
stream
,
hs
m
.
max_stream
);
}
if
(
ins
->
get_operator
().
is_context_free
())
continue
;
if
(
contains
({
"hip::hip_allocate_memory"
,
"hip::hip_copy_literal"
,
"@param"
},
ins
->
name
()))
continue
;
m
.
ins2stream
[
ins
]
=
stream
;
hs
m
.
ins2stream
[
ins
]
=
stream
;
}
return
m
;
return
hs
m
;
}
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
p
)
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
)
{
return
migraphx
::
analyze_streams
(
p
,
make_stream_model
(
p
));
return
migraphx
::
analyze_streams
(
m
,
make_stream_model
(
m
));
}
}
// namespace gpu
...
...
src/targets/gpu/eliminate_workspace.cpp
View file @
d9a5acbd
...
...
@@ -11,11 +11,11 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
void
eliminate_workspace
::
apply
(
module
&
p
)
const
void
eliminate_workspace
::
apply
(
module
&
m
)
const
{
std
::
size_t
n
=
0
;
std
::
vector
<
instruction_ref
>
allocs
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
outputs
().
size
()
!=
1
)
continue
;
...
...
@@ -30,11 +30,11 @@ void eliminate_workspace::apply(module& p) const
}
if
(
n
>
0
)
{
auto
ws
=
p
.
add_parameter
(
"workspace"
,
shape
{
shape
::
int8_type
,
{
n
}});
auto
ws
=
m
.
add_parameter
(
"workspace"
,
shape
{
shape
::
int8_type
,
{
n
}});
for
(
auto
&&
a
:
allocs
)
{
p
.
replace_instruction
(
a
,
ws
);
p
.
remove_instruction
(
a
);
m
.
replace_instruction
(
a
,
ws
);
m
.
remove_instruction
(
a
);
}
}
}
...
...
src/targets/gpu/fuse_ops.cpp
View file @
d9a5acbd
...
...
@@ -316,7 +316,7 @@ struct find_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
(
&
gpu_name
);
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
...
...
@@ -331,7 +331,7 @@ struct find_layernorm
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
p
.
replace_instruction
(
ins
,
hip_layernorm
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_layernorm
{},
x_ins
,
args
.
back
());
}
};
...
...
@@ -343,11 +343,11 @@ struct find_triadd_layernorm
match
::
used_once
(),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()))));
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
triadd
=
ins
->
inputs
().
front
();
p
.
replace_instruction
(
ins
,
hip_triadd_layernorm
{},
triadd
->
inputs
());
m
.
replace_instruction
(
ins
,
hip_triadd_layernorm
{},
triadd
->
inputs
());
}
};
...
...
@@ -355,13 +355,13 @@ struct find_gelu
{
auto
matcher
()
const
{
return
match
::
gelu_erf
(
&
gpu_name
);
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
p
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
}
};
...
...
@@ -372,7 +372,7 @@ struct find_add_gelu
return
match
::
name
(
"gpu::gelu"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
...
...
@@ -381,7 +381,7 @@ struct find_add_gelu
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_add_gelu
{},
args
);
m
.
replace_instruction
(
ins
,
hip_add_gelu
{},
args
);
}
};
...
...
@@ -391,16 +391,16 @@ struct find_gelu_new
auto
matcher
()
const
{
return
match
::
gelu_tanh
(
&
gpu_name
);
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
args
=
ins
->
inputs
();
if
(
fast_math
)
p
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_gelu
{},
x_ins
,
args
.
back
());
else
p
.
replace_instruction
(
ins
,
hip_gelu_new
{},
x_ins
,
args
.
back
());
m
.
replace_instruction
(
ins
,
hip_gelu_new
{},
x_ins
,
args
.
back
());
}
};
...
...
@@ -411,7 +411,7 @@ struct find_add_gelu_new
return
match
::
name
(
"gpu::gelu_new"
)(
match
::
arg
(
0
)(
match
::
name
(
"gpu::add"
).
bind
(
"add"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
...
...
@@ -420,7 +420,7 @@ struct find_add_gelu_new
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_add_gelu_new
{},
args
);
m
.
replace_instruction
(
ins
,
hip_add_gelu_new
{},
args
);
}
};
...
...
@@ -435,7 +435,7 @@ struct find_add_clip
.
bind
(
"add"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
...
...
@@ -448,9 +448,9 @@ struct find_add_clip
add_args
.
pop_back
();
add_args
.
insert
(
add_args
.
end
(),
std
::
next
(
ins_args
.
begin
()),
ins_args
.
end
());
if
(
add_ins
->
name
()
==
"gpu::add"
)
p
.
replace_instruction
(
ins
,
hip_add_clip
{},
add_args
);
m
.
replace_instruction
(
ins
,
hip_add_clip
{},
add_args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
p
.
replace_instruction
(
ins
,
hip_triadd_clip
{},
add_args
);
m
.
replace_instruction
(
ins
,
hip_triadd_clip
{},
add_args
);
}
};
...
...
@@ -470,7 +470,7 @@ struct find_add_unary
.
bind
(
"add"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
ins
=
r
.
result
;
...
...
@@ -481,9 +481,9 @@ struct find_add_unary
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
if
(
add_ins
->
name
()
==
"gpu::add"
)
p
.
replace_instruction
(
ins
,
binary_add_op
,
args
);
m
.
replace_instruction
(
ins
,
binary_add_op
,
args
);
else
if
(
add_ins
->
name
()
==
"gpu::triadd"
)
p
.
replace_instruction
(
ins
,
ternary_add_op
,
args
);
m
.
replace_instruction
(
ins
,
ternary_add_op
,
args
);
}
};
...
...
@@ -498,7 +498,7 @@ struct find_triadd
.
bind
(
"input"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
input_ins
=
r
.
instructions
[
"input"
];
...
...
@@ -513,7 +513,7 @@ struct find_triadd
move_broadcasted_back
(
args
);
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_triadd
{},
args
);
m
.
replace_instruction
(
ins
,
hip_triadd
{},
args
);
}
};
...
...
@@ -525,7 +525,7 @@ struct find_mul_add
match
::
name
(
"gpu::mul"
)(
match
::
used_once
()).
bind
(
"mul"
),
match
::
any
().
bind
(
"b"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
mul_ins
=
r
.
instructions
[
"mul"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
...
...
@@ -538,7 +538,7 @@ struct find_mul_add
args
.
insert
(
std
::
prev
(
args
.
end
()),
b_ins
);
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_mul_add
{},
args
);
m
.
replace_instruction
(
ins
,
hip_mul_add
{},
args
);
}
};
...
...
@@ -550,7 +550,7 @@ struct find_mul_add_relu
match
::
arg
(
0
)(
match
::
name
(
"gpu::mul_add"
)(
match
::
used_once
()).
bind
(
"mul_add"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
mul_add_ins
=
r
.
instructions
[
"mul_add"
];
auto
ins
=
r
.
result
;
...
...
@@ -558,7 +558,7 @@ struct find_mul_add_relu
// Use the allocation from the relu operator
args
.
back
()
=
ins
->
inputs
().
back
();
p
.
replace_instruction
(
ins
,
hip_mul_add_relu
{},
args
);
m
.
replace_instruction
(
ins
,
hip_mul_add_relu
{},
args
);
}
};
...
...
@@ -783,7 +783,7 @@ auto conv_bias(Ms... ms)
}
template
<
class
Op
>
void
apply_conv_bias
(
context
&
ctx
,
module
&
p
,
match
::
matcher_result
r
)
void
apply_conv_bias
(
context
&
ctx
,
module
&
m
,
const
match
::
matcher_result
&
r
)
{
auto
conv_ins
=
r
.
instructions
[
"conv"
];
auto
bias_ins
=
r
.
instructions
[
"bias"
];
...
...
@@ -798,7 +798,7 @@ void apply_conv_bias(context& ctx, module& p, match::matcher_result r)
// TODO: Insert ws allocation
auto
ws
=
cb
.
get_workspace
(
ctx
);
(
void
)
ws
;
p
.
replace_instruction
(
ins
,
cb
,
input_ins
,
weights_ins
,
old_ws_ins
,
bias_ins
,
alloc_ins
);
m
.
replace_instruction
(
ins
,
cb
,
input_ins
,
weights_ins
,
old_ws_ins
,
bias_ins
,
alloc_ins
);
}
inline
auto
precompile_name
(
std
::
string
s
)
// NOLINT
...
...
@@ -829,9 +829,9 @@ struct find_conv_bias
match
::
output
(
match
::
name
(
std
::
unordered_set
<
std
::
string
>
{
"gpu::relu"
}))));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
apply_conv_bias
<
miopen_conv_bias
>
(
*
ctx
,
p
,
std
::
move
(
r
)
);
apply_conv_bias
<
miopen_conv_bias
>
(
*
ctx
,
m
,
r
);
}
};
...
...
@@ -840,9 +840,9 @@ struct find_conv_bias_relu
context
*
ctx
=
nullptr
;
auto
matcher
()
const
{
return
match
::
name
(
"gpu::relu"
)(
match
::
arg
(
0
)(
conv_bias
()));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
apply_conv_bias
<
miopen_conv_bias_relu
>
(
*
ctx
,
p
,
std
::
move
(
r
)
);
apply_conv_bias
<
miopen_conv_bias_relu
>
(
*
ctx
,
m
,
r
);
}
};
...
...
@@ -857,7 +857,7 @@ struct find_conv_pointwise
fusable_conv
(
match
::
used_once
()).
bind
(
"conv"
)));
}
void
apply
(
module
&
m
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
conv_ins
=
r
.
instructions
[
"conv"
];
auto
bias_ins
=
r
.
instructions
[
"bias"
];
...
...
@@ -896,7 +896,7 @@ struct find_gemm_add
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
}
void
apply
(
module
&
p
,
match
::
matcher_result
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
...
...
@@ -919,15 +919,15 @@ struct find_gemm_add
auto
copy_ins
=
c_ins
;
// Insert copy
if
(
ins
==
p
.
end
()
or
c_ins
->
outputs
().
size
()
>
1
or
c_ins
->
inputs
().
empty
())
if
(
ins
==
m
.
end
()
or
c_ins
->
outputs
().
size
()
>
1
or
c_ins
->
inputs
().
empty
())
{
copy_ins
=
p
.
insert_instruction
(
ins
,
hip_copy
{},
c_ins
,
ins
->
inputs
().
back
());
copy_ins
=
m
.
insert_instruction
(
ins
,
hip_copy
{},
c_ins
,
ins
->
inputs
().
back
());
}
inputs
.
push_back
(
copy_ins
);
inputs
.
push_back
(
copy_ins
);
gemm
.
beta
=
1
;
p
.
replace_instruction
(
ins
,
gemm
,
inputs
);
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
}
};
...
...
@@ -938,22 +938,22 @@ struct find_commutative_broadcast
return
match
::
name
(
"gpu::add"
,
"gpu::mul"
)(
match
::
arg
(
1
)(
match
::
broadcast_shape
()));
}
void
apply
(
module
&
p
,
const
match
::
matcher_result
&
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
args
=
ins
->
inputs
();
move_broadcasted_back
(
args
);
p
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
m
.
replace_instruction
(
ins
,
ins
->
get_operator
(),
args
);
}
};
void
fuse_ops
::
apply
(
module
&
p
)
const
void
fuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
p
,
find_gelu
{},
find_gelu_new
{
fast_math
});
run_passes
(
p
,
{
dead_code_elimination
{}});
match
::
find_matches
(
p
,
find_triadd
{});
match
::
find_matches
(
p
,
match
::
find_matches
(
m
,
find_gelu
{},
find_gelu_new
{
fast_math
});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_triadd
{});
match
::
find_matches
(
m
,
find_layernorm
{},
find_conv_pointwise
{
ctx
},
find_conv_bias_relu
{
ctx
},
...
...
@@ -966,8 +966,8 @@ void fuse_ops::apply(module& p) const
find_add_unary
{
"gpu::sigmoid"
,
hip_add_sigmoid
{},
hip_triadd_sigmoid
{}},
find_add_unary
{
"gpu::tanh"
,
hip_add_tanh
{},
hip_triadd_tanh
{}},
find_add_clip
{});
run_passes
(
p
,
{
dead_code_elimination
{}});
match
::
find_matches
(
p
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_commutative_broadcast
{});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_commutative_broadcast
{});
}
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/analyze_streams.hpp
View file @
d9a5acbd
...
...
@@ -11,7 +11,7 @@ struct module;
namespace
gpu
{
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
p
);
std
::
vector
<
stream_race
>
analyze_streams
(
const
module
&
m
);
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/eliminate_workspace.hpp
View file @
d9a5acbd
...
...
@@ -14,7 +14,7 @@ namespace gpu {
struct
eliminate_workspace
{
std
::
string
name
()
const
{
return
"eliminate_workspace"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/fuse_ops.hpp
View file @
d9a5acbd
...
...
@@ -16,7 +16,7 @@ struct fuse_ops
context
*
ctx
=
nullptr
;
bool
fast_math
=
true
;
std
::
string
name
()
const
{
return
"gpu::fuse_ops"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
0 → 100644
View file @
d9a5acbd
#ifndef MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
#define MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
namespace
gpu
{
struct
prefuse_ops
{
std
::
string
name
()
const
{
return
"gpu::prefuse_ops"
;
}
void
apply
(
module
&
m
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
View file @
d9a5acbd
...
...
@@ -17,9 +17,9 @@ struct schedule_model
{
std
::
size_t
streams
=
0
;
std
::
size_t
concurrency
()
const
;
void
sched
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
;
void
wait
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
void
record
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
void
sched
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
n
)
const
;
void
wait
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
void
record
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
std
::
size_t
weight
(
const
operation
&
op
)
const
;
};
...
...
src/targets/gpu/include/migraphx/gpu/sync_device.hpp
View file @
d9a5acbd
...
...
@@ -15,7 +15,7 @@ namespace gpu {
struct
sync_device
{
std
::
string
name
()
const
{
return
"sync_device"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/write_literals.hpp
View file @
d9a5acbd
...
...
@@ -14,7 +14,7 @@ struct write_literals
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"gpu::write_literals"
;
}
void
apply
(
module
&
p
)
const
;
void
apply
(
module
&
m
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/prefuse_ops.cpp
0 → 100644
View file @
d9a5acbd
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/make_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
{
struct
find_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
();
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
if
(
not
x_ins
->
get_shape
().
standard
())
x_ins
=
m
.
insert_instruction
(
ins
,
make_op
(
"contiguous"
),
x_ins
);
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
auto
a
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
x_ins
->
get_shape
())}}));
m
.
replace_instruction
(
ins
,
make_op
(
"gpu::layernorm"
),
x_ins
,
a
);
}
};
struct
find_triaddlayernorm
{
auto
matcher
()
const
{
auto
add1
=
match
::
name
(
"add"
)(
match
::
none_of
(
match
::
is_constant
()),
match
::
args
(
match
::
any
().
bind
(
"z1"
),
match
::
any
().
bind
(
"z2"
)));
auto
add2
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
add1
,
match
::
any
().
bind
(
"z3"
)));
return
match
::
layernorm
()(
match
::
var
(
"x"
)(
add2
));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"z1"
];
auto
y_ins
=
r
.
instructions
[
"z2"
];
auto
z_ins
=
r
.
instructions
[
"z3"
];
for
(
auto
*
pins
:
{
&
x_ins
,
&
y_ins
,
&
z_ins
})
{
if
(
not
(
*
pins
)
->
get_shape
().
standard
())
*
pins
=
m
.
insert_instruction
(
ins
,
make_op
(
"contiguous"
),
*
pins
);
}
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
auto
a
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
x_ins
->
get_shape
())}}));
m
.
replace_instruction
(
ins
,
make_op
(
"gpu::triadd_layernorm"
),
x_ins
,
y_ins
,
z_ins
,
a
);
}
};
}
// namespace
void
prefuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_triaddlayernorm
{},
find_layernorm
{});
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/schedule_model.cpp
View file @
d9a5acbd
...
...
@@ -77,28 +77,28 @@ MIGRAPHX_REGISTER_OP(wait_event)
MIGRAPHX_REGISTER_OP
(
set_stream
)
std
::
size_t
schedule_model
::
concurrency
()
const
{
return
streams
;
}
void
schedule_model
::
sched
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
void
schedule_model
::
sched
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
n
)
const
{
auto
last_stream
=
std
::
find_if
(
std
::
make_reverse_iterator
(
ins
),
std
::
make_reverse_iterator
(
p
.
begin
()),
std
::
make_reverse_iterator
(
m
.
begin
()),
[
&
](
auto
&&
i
)
{
return
i
.
name
()
==
"gpu::set_stream"
;
});
if
(
last_stream
!=
std
::
make_reverse_iterator
(
p
.
begin
()))
if
(
last_stream
!=
std
::
make_reverse_iterator
(
m
.
begin
()))
{
auto
&&
op
=
any_cast
<
set_stream
>
(
last_stream
->
get_operator
());
// If the same stream was set earlier then skip
if
(
op
.
stream
==
n
)
return
;
}
p
.
insert_instruction
(
ins
,
set_stream
{
n
});
m
.
insert_instruction
(
ins
,
set_stream
{
n
});
}
void
schedule_model
::
wait
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
void
schedule_model
::
wait
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
p
.
insert_instruction
(
ins
,
wait_event
{
wait_id
});
m
.
insert_instruction
(
ins
,
wait_event
{
wait_id
});
}
void
schedule_model
::
record
(
module
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
void
schedule_model
::
record
(
module
&
m
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
p
.
insert_instruction
(
std
::
next
(
ins
),
record_event
{
wait_id
});
m
.
insert_instruction
(
std
::
next
(
ins
),
record_event
{
wait_id
});
}
static
std
::
unordered_map
<
std
::
string
,
std
::
size_t
>
create_weight_map
()
...
...
src/targets/gpu/sync_device.cpp
View file @
d9a5acbd
...
...
@@ -8,9 +8,9 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
void
sync_device
::
apply
(
module
&
p
)
const
void
sync_device
::
apply
(
module
&
m
)
const
{
auto
last
=
std
::
prev
(
p
.
end
());
auto
last
=
std
::
prev
(
m
.
end
());
if
(
last
->
name
()
==
"@return"
)
{
auto
inputs
=
last
->
inputs
();
...
...
@@ -18,10 +18,10 @@ void sync_device::apply(module& p) const
return
(
i
->
name
()
==
"hip::copy_from_gpu"
);
}))
{
auto
sync_in
=
p
.
insert_instruction
(
last
,
make_op
(
"hip::sync_stream"
),
inputs
);
auto
sync_in
=
m
.
insert_instruction
(
last
,
make_op
(
"hip::sync_stream"
),
inputs
);
if
(
not
inputs
.
empty
())
{
p
.
replace_instruction
(
inputs
.
front
(),
sync_in
);
m
.
replace_instruction
(
inputs
.
front
(),
sync_in
);
}
}
}
...
...
src/targets/gpu/target.cpp
View file @
d9a5acbd
...
...
@@ -31,6 +31,7 @@
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/eliminate_workspace.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/mlir_conv.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
...
...
@@ -96,6 +97,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
simplify_algebra
{},
simplify_reshapes
{},
simplify_algebra
{},
prefuse_ops
{},
dead_code_elimination
{},
auto_contiguous
{},
simplify_reshapes
{},
propagate_constant
{},
...
...
src/targets/gpu/write_literals.cpp
View file @
d9a5acbd
...
...
@@ -11,25 +11,25 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_COPY_LITERALS
)
void
write_literals
::
apply
(
module
&
p
)
const
void
write_literals
::
apply
(
module
&
m
)
const
{
assert
(
ctx
!=
nullptr
);
std
::
size_t
n
=
0
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"@literal"
)
{
if
(
enabled
(
MIGRAPHX_COPY_LITERALS
{}))
{
literal
l
=
ins
->
get_literal
();
auto
pre
=
p
.
add_literal
(
l
);
auto
alloc
=
p
.
insert_instruction
(
std
::
next
(
pre
),
hip_allocate
{
l
.
get_shape
()});
p
.
replace_instruction
(
ins
,
hip_copy_to_gpu
{},
pre
,
alloc
);
auto
pre
=
m
.
add_literal
(
l
);
auto
alloc
=
m
.
insert_instruction
(
std
::
next
(
pre
),
hip_allocate
{
l
.
get_shape
()});
m
.
replace_instruction
(
ins
,
hip_copy_to_gpu
{},
pre
,
alloc
);
}
else
{
std
::
string
id
=
p
.
name
()
+
":@literal:"
+
std
::
to_string
(
n
);
p
.
replace_instruction
(
ins
,
hip_copy_literal
{
ins
->
get_literal
(),
id
});
std
::
string
id
=
m
.
name
()
+
":@literal:"
+
std
::
to_string
(
n
);
m
.
replace_instruction
(
ins
,
hip_copy_literal
{
ins
->
get_literal
(),
id
});
n
++
;
}
}
...
...
test/dead_code_elimination_test.cpp
View file @
d9a5acbd
...
...
@@ -180,6 +180,40 @@ TEST_CASE(duplicate_args3)
EXPECT
(
result
==
migraphx
::
literal
{
0
});
}
TEST_CASE
(
reused_twice
)
{
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
std
::
vector
<
size_t
>
dims
=
{
1
,
2
,
2
};
auto
x
=
mm
->
add_parameter
(
"x"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
dims
});
auto
y
=
mm
->
add_parameter
(
"y"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
dims
});
auto
z
=
mm
->
add_parameter
(
"z"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
dims
});
auto
add1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
x
,
y
);
auto
add2
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
add1
,
z
);
auto
epsilon
=
mm
->
add_literal
(
1e-12
f
);
auto
exponent
=
mm
->
add_literal
(
2.0
f
);
auto
mean
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_mean"
,
{{
"axes"
,
{
2
}}}),
add2
);
auto
mean_mbcast
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
dims
}}),
mean
);
auto
sub
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"sub"
),
add2
,
mean_mbcast
);
auto
exponent_mbcast
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
dims
}}),
exponent
);
auto
pow
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"pow"
),
sub
,
exponent_mbcast
);
auto
var
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_mean"
,
{{
"axes"
,
{
2
}}}),
pow
);
auto
epsilon_mbcast
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
{
1
,
dims
.
at
(
1
),
1
}}}),
epsilon
);
auto
add_epsilon
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
var
,
epsilon_mbcast
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"sqrt"
),
add_epsilon
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
x
,
y
);
auto
count
=
std
::
distance
(
mm
->
begin
(),
mm
->
end
());
run_pass
(
p
);
p
.
debug_print
();
EXPECT
(
std
::
distance
(
mm
->
begin
(),
mm
->
end
())
!=
count
);
EXPECT
(
std
::
distance
(
mm
->
begin
(),
mm
->
end
())
==
4
);
}
TEST_CASE
(
unused_module
)
{
migraphx
::
program
p
;
...
...
test/matcher.cpp
View file @
d9a5acbd
...
...
@@ -332,7 +332,7 @@ TEST_CASE(match_either_args_any1)
match
::
name
(
"sum"
)(
match
::
either_arg
(
0
,
1
)(
match
::
any
().
bind
(
"x"
),
match
::
any
().
bind
(
"y"
)));
auto
r
=
find_match
(
mm
,
m
);
EXPECT
(
bool
{
r
.
result
==
sum1
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"x"
)
!=
r
.
instructions
.
at
(
"y"
)
});
EXPECT
(
bool
{
r
.
instructions
[
"x"
]
!=
r
.
instructions
[
"y"
]
});
}
TEST_CASE
(
match_either_args_any2
)
...
...
@@ -347,7 +347,7 @@ TEST_CASE(match_either_args_any2)
match
::
either_arg
(
0
,
1
)(
match
::
any
().
bind
(
"x"
),
match
::
name
(
"@literal"
).
bind
(
"y"
)));
auto
r
=
find_match
(
mm
,
m
);
EXPECT
(
bool
{
r
.
result
==
sum1
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"x"
)
!=
r
.
instructions
.
at
(
"y"
)
});
EXPECT
(
bool
{
r
.
instructions
[
"x"
]
!=
r
.
instructions
[
"y"
]
});
}
TEST_CASE
(
match_either_args_any3
)
...
...
@@ -362,7 +362,7 @@ TEST_CASE(match_either_args_any3)
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"@literal"
).
bind
(
"x"
),
match
::
any
().
bind
(
"y"
)));
auto
r
=
find_match
(
mm
,
m
);
EXPECT
(
bool
{
r
.
result
==
sum1
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"x"
)
!=
r
.
instructions
.
at
(
"y"
)
});
EXPECT
(
bool
{
r
.
instructions
[
"x"
]
!=
r
.
instructions
[
"y"
]
});
}
TEST_CASE
(
match_either_args_any4
)
...
...
@@ -377,7 +377,7 @@ TEST_CASE(match_either_args_any4)
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"sum"
).
bind
(
"x"
),
match
::
any
().
bind
(
"y"
)));
auto
r
=
find_match
(
mm
,
m
);
EXPECT
(
bool
{
r
.
result
==
sum2
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"x"
)
!=
r
.
instructions
.
at
(
"y"
)
});
EXPECT
(
bool
{
r
.
instructions
[
"x"
]
!=
r
.
instructions
[
"y"
]
});
}
TEST_CASE
(
match_either_args_any5
)
...
...
@@ -392,7 +392,7 @@ TEST_CASE(match_either_args_any5)
match
::
either_arg
(
0
,
1
)(
match
::
any
().
bind
(
"x"
),
match
::
name
(
"sum"
).
bind
(
"y"
)));
auto
r
=
find_match
(
mm
,
m
);
EXPECT
(
bool
{
r
.
result
==
sum2
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"x"
)
!=
r
.
instructions
.
at
(
"y"
)
});
EXPECT
(
bool
{
r
.
instructions
[
"x"
]
!=
r
.
instructions
[
"y"
]
});
}
TEST_CASE
(
match_all_of1
)
...
...
@@ -747,10 +747,10 @@ TEST_CASE(match_bind1)
match
::
standard_shape
())
.
bind
(
"pass"
);
auto
r
=
find_match
(
mm
,
m
);
EXPECT
(
bool
{
r
.
instructions
.
at
(
"one"
)
==
one
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"two"
)
==
two
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"sum"
)
==
sum
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"pass"
)
==
pass
});
EXPECT
(
bool
{
r
.
instructions
[
"one"
]
==
one
});
EXPECT
(
bool
{
r
.
instructions
[
"two"
]
==
two
});
EXPECT
(
bool
{
r
.
instructions
[
"sum"
]
==
sum
});
EXPECT
(
bool
{
r
.
instructions
[
"pass"
]
==
pass
});
EXPECT
(
bool
{
r
.
result
==
pass
});
}
...
...
@@ -795,9 +795,9 @@ TEST_CASE(match_bind_modules2)
match
::
standard_shape
())
.
bind
(
"pass"
);
auto
r
=
find_match
(
*
child
,
m
);
EXPECT
(
bool
{
r
.
instructions
.
at
(
"two"
)
==
two
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"sum"
)
==
sum
});
EXPECT
(
bool
{
r
.
instructions
.
at
(
"pass"
)
==
pass
});
EXPECT
(
bool
{
r
.
instructions
[
"two"
]
==
two
});
EXPECT
(
bool
{
r
.
instructions
[
"sum"
]
==
sum
});
EXPECT
(
bool
{
r
.
instructions
[
"pass"
]
==
pass
});
EXPECT
(
bool
{
r
.
result
==
pass
});
}
...
...
tools/install_prereqs.sh
View file @
d9a5acbd
...
...
@@ -4,12 +4,20 @@
set
-e
#install pip3, rocm-cmake, rocblas and miopen
apt update
&&
apt
install
-y
python3-pip rocm-cmake rocblas miopen-hip openmp-extras
export
LC_ALL
=
C.UTF-8
export
LANG
=
C.UTF-8
# Need pip3 and Python headers to build dependencies
apt update
&&
apt
install
-y
python3-pip python3-dev cmake rocm-cmake rocblas miopen-hip openmp-extras
# Needed for cmake to build various pip packages
pip3
install
setuptools wheel
# install rbuild to build dependencies
pip3
install
https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
PREFIX
=
/usr/local
REQ_FILE_DIR
=
""
if
[
"$#"
-ge
2
]
;
then
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment