Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
96ff131b
Commit
96ff131b
authored
Sep 15, 2022
by
Paul
Browse files
Use improved vectorizer
parent
b23063c4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
5 additions
and
7 deletions
+5
-7
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+1
-1
src/targets/gpu/jit/layernorm.cpp
src/targets/gpu/jit/layernorm.cpp
+2
-4
src/targets/gpu/jit/reduce.cpp
src/targets/gpu/jit/reduce.cpp
+1
-1
src/targets/gpu/jit/softmax.cpp
src/targets/gpu/jit/softmax.cpp
+1
-1
No files found.
src/targets/gpu/jit/concat.cpp
View file @
96ff131b
...
@@ -74,7 +74,7 @@ struct concat_compiler : compiler<concat_compiler>
...
@@ -74,7 +74,7 @@ struct concat_compiler : compiler<concat_compiler>
options
.
output
=
inputs
.
back
();
options
.
output
=
inputs
.
back
();
options
.
params
=
"-Wno-float-equal"
;
options
.
params
=
"-Wno-float-equal"
;
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
axis
,
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
set_launch_params
(
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
...
...
src/targets/gpu/jit/layernorm.cpp
View file @
96ff131b
...
@@ -50,7 +50,6 @@ ${preamble}
...
@@ -50,7 +50,6 @@ ${preamble}
extern "C" {
extern "C" {
__global__ void ${kernel}(${params})
__global__ void ${kernel}(${params})
{
{
auto idx = make_index();
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) {
${layernorm}<${axis}>(${post}, xs...);
${layernorm}<${axis}>(${post}, xs...);
});
});
...
@@ -78,9 +77,8 @@ struct layernorm_compiler : compiler<layernorm_compiler>
...
@@ -78,9 +77,8 @@ struct layernorm_compiler : compiler<layernorm_compiler>
// Vectorize if the axis is a reduction axis
// Vectorize if the axis is a reduction axis
if
(
axis
==
faxis
)
if
(
axis
==
faxis
)
{
{
vec
=
vectorize
::
elements
(
faxis
,
inputs
);
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
inputs
);
}
}
auto
preloads
=
preload
::
broadcasts
(
axis
,
inputs
);
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]);
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]);
auto
block_size
=
compute_block_size
(
relements
,
256
);
auto
block_size
=
compute_block_size
(
relements
,
256
);
...
@@ -95,7 +93,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
...
@@ -95,7 +93,7 @@ struct layernorm_compiler : compiler<layernorm_compiler>
{{
"kernel"
,
options
.
kernel_name
},
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"transformers"
,
make_transformer_args
(
preloads
,
vec
)},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"layernorm"
,
v
.
get
(
"layernorm"
,
std
::
string
{
"layernorm"
})},
{
"layernorm"
,
v
.
get
(
"layernorm"
,
std
::
string
{
"layernorm"
})},
...
...
src/targets/gpu/jit/reduce.cpp
View file @
96ff131b
...
@@ -121,7 +121,7 @@ struct reduce_compiler : compiler<reduce_compiler>
...
@@ -121,7 +121,7 @@ struct reduce_compiler : compiler<reduce_compiler>
// Vectorize if the axis is a reduction axis
// Vectorize if the axis is a reduction axis
if
(
options
.
virtual_inputs
.
back
().
lens
()[
faxis
]
==
1
)
if
(
options
.
virtual_inputs
.
back
().
lens
()[
faxis
]
==
1
)
{
{
vec
=
vectorize
::
elements
(
faxis
,
options
.
virtual_inputs
);
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
options
.
virtual_inputs
);
}
}
auto
relements
=
get_reduce_elements
(
options
.
virtual_inputs
)
/
vec
.
size
;
auto
relements
=
get_reduce_elements
(
options
.
virtual_inputs
)
/
vec
.
size
;
auto
nelements
=
options
.
virtual_inputs
.
back
().
elements
();
auto
nelements
=
options
.
virtual_inputs
.
back
().
elements
();
...
...
src/targets/gpu/jit/softmax.cpp
View file @
96ff131b
...
@@ -69,7 +69,7 @@ struct softmax_compiler : compiler<softmax_compiler>
...
@@ -69,7 +69,7 @@ struct softmax_compiler : compiler<softmax_compiler>
// Vectorize if the axis is a reduction axis
// Vectorize if the axis is a reduction axis
if
(
faxis
==
axis
)
if
(
faxis
==
axis
)
{
{
vec
=
vectorize
::
elements
(
faxis
,
inputs
);
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
inputs
);
}
}
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
relements
=
inputs
[
0
].
lens
()[
axis
]
/
vec
.
size
;
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]);
auto
nelements
=
(
inputs
.
back
().
elements
()
/
inputs
[
0
].
lens
()[
axis
]);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment