Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
acc58cfe
Commit
acc58cfe
authored
Sep 20, 2022
by
Paul
Browse files
Merge branch 'jit-concat-pointwise' into dense-opt
parents
c822c48d
e60a7d5e
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
116 additions
and
30 deletions
+116
-30
src/module.cpp
src/module.cpp
+6
-2
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+20
-4
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+10
-3
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+29
-0
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+28
-11
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
+18
-8
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+5
-2
No files found.
src/module.cpp
View file @
acc58cfe
...
...
@@ -385,9 +385,13 @@ instruction_ref module::move_instruction(instruction_ref src, instruction_ref ds
instruction_ref
module
::
move_instructions
(
instruction_ref
src
,
instruction_ref
dst
)
{
this
->
move_instruction
(
src
,
dst
);
for
(
auto
ins
:
src
->
inputs
())
this
->
move_instruction
(
ins
,
src
);
{
if
(
not
contains
(
this
->
impl
->
instructions
,
ins
))
continue
;
this
->
move_instructions
(
ins
,
dst
);
}
this
->
move_instruction
(
src
,
dst
);
return
src
;
}
...
...
src/simplify_algebra.cpp
View file @
acc58cfe
...
...
@@ -412,6 +412,24 @@ struct find_concat_op
}
};
void
move_instructions_back
(
module
&
m
,
instruction_ref
pos
,
std
::
vector
<
instruction_ref
>
inss
)
{
auto
start
=
range
(
m
.
begin
(),
pos
);
for
(
auto
ins
:
iterator_for
(
start
))
{
auto
it
=
std
::
find
(
inss
.
begin
(),
inss
.
end
(),
ins
);
if
(
it
!=
inss
.
end
())
inss
.
erase
(
it
);
}
for
(
auto
ins
:
inss
)
{
if
(
not
m
.
has_instruction
(
ins
))
continue
;
move_instructions_back
(
m
,
pos
,
ins
->
inputs
());
m
.
move_instruction
(
ins
,
pos
);
}
}
std
::
vector
<
instruction_ref
>
get_splits
(
instruction_ref
ins
)
{
std
::
vector
<
instruction_ref
>
result
;
...
...
@@ -587,8 +605,7 @@ struct find_splits
}))
return
;
for
(
auto
data
:
data_args
)
m
.
move_instructions
(
data
,
ins
);
move_instructions_back
(
m
,
ins
,
data_args
);
auto
slice_op
=
any_cast
<
op
::
slice
>
(
splits
.
front
()
->
get_operator
());
assert
(
not
slice_op
.
axes
.
empty
());
...
...
@@ -841,8 +858,7 @@ struct find_conv_dot_horiz_fusion
concat_axis
=
axis
;
}
for
(
auto
arg
:
args
)
m
.
move_instructions
(
arg
,
input
);
move_instructions_back
(
m
,
input
,
args
);
// TODO: Check if axes match
auto
concat
=
m
.
insert_instruction
(
input
,
make_op
(
"concat"
,
{{
"axis"
,
concat_axis
}}),
args
);
...
...
src/targets/gpu/compile_ops.cpp
View file @
acc58cfe
...
...
@@ -39,19 +39,26 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
struct
precompile_op
{
operation
op
=
op
::
identity
{};
operation
op
=
op
::
identity
{};
std
::
size_t
additional_args
=
1
;
bool
ignore_modules
=
false
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
op
,
"op"
));
return
pack
(
f
(
self
.
op
,
"op"
),
f
(
self
.
additional_args
,
"additional_args"
),
f
(
self
.
ignore_modules
,
"ignore_modules"
));
}
std
::
string
name
()
const
{
return
"gpu::precompile_op"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
inputs
.
pop_back
();
// Pop off additional args
inputs
.
resize
(
inputs
.
size
()
-
additional_args
);
if
(
ignore_modules
)
return
op
.
compute_shape
(
inputs
);
return
op
.
compute_shape
(
inputs
,
mods
);
}
...
...
src/targets/gpu/fuse_ops.cpp
View file @
acc58cfe
...
...
@@ -1183,6 +1183,34 @@ struct find_layernorm_pointwise
}
};
struct
find_concat_pointwise
{
auto
matcher
()
const
{
return
precompile_name
(
"pointwise"
)(
match
::
arg
(
0
)(
precompile_name
(
"concat"
).
bind
(
"concat"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
concat
=
r
.
instructions
[
"concat"
];
auto
*
pm
=
ins
->
module_inputs
().
front
();
if
(
not
concat
->
module_inputs
().
empty
())
return
;
auto
inputs
=
concat
->
inputs
();
inputs
.
pop_back
();
inputs
.
insert
(
inputs
.
end
(),
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
());
auto
op
=
concat
->
get_operator
();
op
.
from_value
({{
"additional_args"
,
ins
->
inputs
().
size
()
-
1
},
{
"ignore_modules"
,
true
}});
m
.
replace_instruction
(
ins
,
op
,
inputs
,
{
pm
});
}
};
void
fuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_contiguous_pointwise
{},
find_gelu
{},
find_gelu_new
{
fast_math
});
...
...
@@ -1206,6 +1234,7 @@ void fuse_ops::apply(module& m) const
find_triadd_layernorm
{},
find_gemm_add
{},
find_layernorm_pointwise
{},
find_concat_pointwise
{},
find_gemm_pointwise
{},
find_contiguous_tranpose_gemm
{},
find_commutative_broadcast
{});
...
...
src/targets/gpu/jit/concat.cpp
View file @
acc58cfe
...
...
@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
concat<${axis}>(y, xs...);
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y,
${concat_params},
auto... xs) {
concat<${axis}>(
${concat_args})(${post},
y, xs...);
});
}
...
...
@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
// TODO: Use reduce_dims
auto
num_of_concat_inputs
=
v
.
get
(
"concat_inputs"
,
inputs
.
size
()
-
1
);
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
params
=
"-Wno-float-equal"
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
axis
,
options
.
inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
auto
src
=
interpolate_string
(
concat_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
auto
src
=
interpolate_string
(
concat_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"concat_params"
,
enum_params
(
num_of_concat_inputs
,
"auto concat_x"
)},
{
"concat_args"
,
enum_params
(
num_of_concat_inputs
,
"concat_x"
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
auto
v
=
op
.
to_value
();
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"concat_inputs"
]
=
ins
->
inputs
().
size
()
-
pm
->
get_parameter_names
().
size
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_concat"
);
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/concat.hpp
View file @
acc58cfe
...
...
@@ -44,6 +44,12 @@ constexpr auto concat_slice(Output out, Input, Start)
return
make_tensor_view
(
&
out
[
offset
],
s
);
}
template
<
index_int
Axis
,
class
Input
,
class
Start
,
class
...
Ts
>
constexpr
auto
concat_slices
(
Input
input
,
Start
start
,
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
f
(
concat_slice
<
Axis
>
(
xs
,
input
,
start
)...);
};
}
template
<
index_int
Axis
,
class
Input
>
constexpr
auto
concat_ends
(
Input
)
{
...
...
@@ -51,15 +57,19 @@ constexpr auto concat_ends(Input)
return
_c
<
lens
[
Axis
]
>
;
}
template
<
index_int
Axis
,
class
Output
,
class
...
Inputs
>
__device__
void
concat
(
Output
output
,
Inputs
...
inputs
)
template
<
index_int
Axis
,
class
...
Inputs
>
__device__
auto
concat
(
Inputs
...
inputs
)
{
auto
idx
=
make_index
();
fold
([
&
](
auto
start
,
auto
input
)
{
auto
y
=
concat_slice
<
Axis
>
(
output
,
input
,
start
);
idx
.
global_stride
(
input
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
y
[
i
]
=
input
[
i
];
});
return
start
+
concat_ends
<
Axis
>
(
input
);
})(
_c
<
0
>
,
inputs
...);
return
[
=
](
auto
f
,
auto
...
ts
)
{
auto
idx
=
make_index
();
fold
([
&
](
auto
start
,
auto
input
)
{
concat_slices
<
Axis
>
(
input
,
start
,
ts
...)([
&
](
auto
y
,
auto
...
xs
)
{
idx
.
global_stride
(
input
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
y
[
i
]
=
f
(
input
[
i
],
xs
[
i
]...);
});
});
return
start
+
concat_ends
<
Axis
>
(
input
);
})(
_c
<
0
>
,
inputs
...);
};
}
}
// namespace migraphx
...
...
src/targets/gpu/mlir.cpp
View file @
acc58cfe
...
...
@@ -99,7 +99,10 @@ struct mlir_handle
mlir_handle
(
T
p
)
:
handle
(
ptr
{
p
})
{}
T
get
()
const
{
return
handle
.
get
().
get
();
}
T
get
()
const
{
return
handle
.
get
().
get
();
// NOLINT(readability-redundant-smartptr-get)
}
T
release
()
{
return
handle
.
release
().
get
();
}
...
...
@@ -571,7 +574,7 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
std
::
string
get_tune_params
()
{
return
get_mlir_perf_for_conv
(
pp
);
}
std
::
string
get_tune_params
()
const
{
return
get_mlir_perf_for_conv
(
pp
);
}
mlir_context
ctx
;
MlirLocation
location
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment