Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
f3aa2c67
Commit
f3aa2c67
authored
Jun 25, 2022
by
Paul
Browse files
Add layernorm post fusion
parent
b973563b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
54 additions
and
9 deletions
+54
-9
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+21
-0
src/targets/gpu/jit/layernorm.cpp
src/targets/gpu/jit/layernorm.cpp
+25
-6
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
...rgets/gpu/kernels/include/migraphx/kernels/functional.hpp
+1
-1
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+7
-2
No files found.
src/targets/gpu/fuse_ops.cpp
View file @
f3aa2c67
...
...
@@ -1047,6 +1047,26 @@ struct find_contiguous_pointwise
}
};
struct
find_layernorm_pointwise
{
auto
matcher
()
const
{
return
precompile_name
(
"pointwise"
)(
match
::
arg
(
0
)(
precompile_name
(
"gpu::prelayernorm"
).
bind
(
"layernorm"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
layernorm
=
r
.
instructions
[
"layernorm"
];
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
inputs
=
ins
->
inputs
();
inputs
.
front
()
=
layernorm
->
inputs
().
front
();
m
.
replace_instruction
(
ins
,
layernorm
->
get_operator
(),
inputs
,
{
pm
});
}
};
void
fuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_contiguous_pointwise
{},
find_gelu
{},
find_gelu_new
{
fast_math
});
...
...
@@ -1069,6 +1089,7 @@ void fuse_ops::apply(module& m) const
match
::
find_matches
(
m
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_layernorm_pointwise
{},
find_gemm_pointwise
{},
find_commutative_broadcast
{});
match
::
find_matches
(
m
,
find_contiguous
{});
...
...
src/targets/gpu/jit/layernorm.cpp
View file @
f3aa2c67
...
...
@@ -19,15 +19,19 @@ static const char* const layernorm_kernel = R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/layernorm.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/preload.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void
layernorm_kernel(void* input_p, void* output_p
)
__global__ void
${kernel}(${params}
)
{
transform_args(make_tensors(), rotate_last(), ${transformers})(input_p, output_p)([](auto... xs) {
layernorm<${axis}>(op::id{}, xs...);
auto idx = make_index();
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
layernorm<${axis}>(${post}, xs...);
});
}
...
...
@@ -52,6 +56,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
{
vec
=
vectorize
::
elements
(
faxis
,
inputs
);
}
auto
preloads
=
preload
::
broadcasts
(
axis
,
inputs
);
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
nelements
=
inputs
.
back
().
elements
()
/
relements
;
auto
block_size
=
compute_block_size
(
relements
,
256
);
...
...
@@ -60,18 +65,32 @@ struct layernorm_compiler : compiler<layernorm_compiler>
v
,
compute_global_for
(
ctx
,
nelements
*
block_size
,
256
),
block_size
);
options
.
output
=
inputs
.
back
();
options
.
inputs
=
inputs
;
options
.
kernel_name
=
"layernorm_kernel"
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"layernorm_kernel"
)
;
auto
src
=
interpolate_string
(
layernorm_kernel
,
{{
"transformers"
,
make_transformer_args
(
vec
)},
{
"axis"
,
to_string
(
axis
)}});
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"transformers"
,
make_transformer_args
(
preloads
,
vec
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"axis"
,
to_string
(
axis
)}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
auto
v
=
op
.
to_value
();
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_layernorm"
);
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_layernorm)"
;
v
[
"kernel"
]
=
"layernorm_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
View file @
f3aa2c67
...
...
@@ -32,7 +32,7 @@
// NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \
[](auto&&... xs) MIGRAPHX_RETURNS((__VA_ARGS__)(static_cast<decltype(
xs)>(
xs)...))
[](auto&&...
private_lisft_
xs) MIGRAPHX_RETURNS((__VA_ARGS__)(static_cast<decltype(
private_lisft_xs)>(private_lisft_
xs)...))
namespace
migraphx
{
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
f3aa2c67
...
...
@@ -34,9 +34,14 @@ struct layernorm
{
std
::
string
name
()
const
{
return
"gpu::prelayernorm"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
vector
<
module_ref
>
mods
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
std
::
size_t
nargs
=
1
;
if
(
not
mods
.
empty
())
{
auto
*
pm
=
mods
.
front
();
nargs
=
pm
->
get_parameter_names
().
size
();
}
check_shapes
{
inputs
,
*
this
}.
has
(
nargs
);
auto
s
=
inputs
.
at
(
0
);
if
(
s
.
scalar
())
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment