Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
913ae362
Unverified
Commit
913ae362
authored
Dec 13, 2022
by
Chris Austen
Committed by
GitHub
Dec 13, 2022
Browse files
Merge branch 'develop' into optimize
parents
f1e16656
b8c8d09b
Changes
66
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
247 additions
and
181 deletions
+247
-181
src/include/migraphx/shape_for_each.hpp
src/include/migraphx/shape_for_each.hpp
+3
-1
src/insert_pad.cpp
src/insert_pad.cpp
+2
-2
src/instruction.cpp
src/instruction.cpp
+18
-0
src/onnx/onnx_parser.cpp
src/onnx/onnx_parser.cpp
+18
-5
src/onnx/parse_pooling.cpp
src/onnx/parse_pooling.cpp
+82
-38
src/onnx/parse_transpose.cpp
src/onnx/parse_transpose.cpp
+1
-1
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+19
-19
src/shape.cpp
src/shape.cpp
+8
-0
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+10
-7
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+2
-2
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+15
-6
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+3
-3
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+0
-1
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
...argets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
+1
-0
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+0
-32
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+32
-0
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+24
-60
src/targets/gpu/perfdb.cpp
src/targets/gpu/perfdb.cpp
+4
-0
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+5
-2
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+0
-2
No files found.
src/include/migraphx/shape_for_each.hpp
View file @
913ae362
...
@@ -31,6 +31,9 @@
...
@@ -31,6 +31,9 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
/**
* Iterates the given function over the indices from the shape in order.
*/
template
<
class
F
>
template
<
class
F
>
void
shape_for_each
(
const
migraphx
::
shape
&
s
,
F
f
)
void
shape_for_each
(
const
migraphx
::
shape
&
s
,
F
f
)
{
{
...
@@ -51,7 +54,6 @@ void shape_for_each(const migraphx::shape& s, F f)
...
@@ -51,7 +54,6 @@ void shape_for_each(const migraphx::shape& s, F f)
call
(
indices
);
call
(
indices
);
}
}
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/insert_pad.cpp
View file @
913ae362
...
@@ -77,14 +77,14 @@ static void update_pooling(const instruction_ref& input, const instruction_ref&
...
@@ -77,14 +77,14 @@ static void update_pooling(const instruction_ref& input, const instruction_ref&
{
{
return
;
return
;
}
}
auto
kdims
=
input
->
get_shape
().
lens
().
size
()
-
2
;
auto
kdims
=
input
->
get_shape
().
ndim
()
-
2
;
if
(
std
::
equal
(
op
.
padding
.
begin
(),
if
(
std
::
equal
(
op
.
padding
.
begin
(),
op
.
padding
.
begin
()
+
kdims
,
op
.
padding
.
begin
()
+
kdims
,
op
.
padding
.
begin
()
+
kdims
,
op
.
padding
.
begin
()
+
kdims
,
op
.
padding
.
end
()))
op
.
padding
.
end
()))
return
;
return
;
std
::
vector
<
int64_t
>
padding
(
input
->
get_shape
().
lens
().
size
()
*
2
,
0
);
std
::
vector
<
int64_t
>
padding
(
input
->
get_shape
().
ndim
()
*
2
,
0
);
std
::
vector
<
size_t
>
pads_l
(
op
.
padding
.
begin
(),
op
.
padding
.
begin
()
+
kdims
);
std
::
vector
<
size_t
>
pads_l
(
op
.
padding
.
begin
(),
op
.
padding
.
begin
()
+
kdims
);
std
::
vector
<
size_t
>
pads_r
(
op
.
padding
.
begin
()
+
kdims
,
op
.
padding
.
end
());
std
::
vector
<
size_t
>
pads_r
(
op
.
padding
.
begin
()
+
kdims
,
op
.
padding
.
end
());
op
.
padding
=
std
::
vector
<
size_t
>
(
kdims
*
2
,
0
);
op
.
padding
=
std
::
vector
<
size_t
>
(
kdims
*
2
,
0
);
...
...
src/instruction.cpp
View file @
913ae362
...
@@ -302,6 +302,24 @@ void instruction::replace_mod_argument(module_ref old, module_ref new_mod)
...
@@ -302,6 +302,24 @@ void instruction::replace_mod_argument(module_ref old, module_ref new_mod)
std
::
replace
(
module_args
.
begin
(),
module_args
.
end
(),
old
,
new_mod
);
std
::
replace
(
module_args
.
begin
(),
module_args
.
end
(),
old
,
new_mod
);
}
}
bool
instruction
::
is_undefined
()
const
{
if
(
op
.
name
()
==
"undefined"
)
{
return
true
;
}
else
if
(
this
->
inputs
().
empty
())
{
return
false
;
}
else
{
return
std
::
all_of
(
this
->
inputs
().
begin
(),
this
->
inputs
().
end
(),
[](
auto
arg
)
{
return
arg
->
is_undefined
();
});
}
}
bool
instruction
::
can_eval
()
const
bool
instruction
::
can_eval
()
const
{
{
if
(
op
.
name
()
==
"@literal"
)
if
(
op
.
name
()
==
"@literal"
)
...
...
src/onnx/onnx_parser.cpp
View file @
913ae362
...
@@ -393,18 +393,31 @@ literal onnx_parser::parse_value(const onnx::AttributeProto& attr) const
...
@@ -393,18 +393,31 @@ literal onnx_parser::parse_value(const onnx::AttributeProto& attr) const
literal
onnx_parser
::
parse_tensor
(
const
onnx
::
TensorProto
&
t
)
const
literal
onnx_parser
::
parse_tensor
(
const
onnx
::
TensorProto
&
t
)
const
{
{
std
::
vector
<
std
::
size_t
>
dims
(
t
.
dims
().
begin
(),
t
.
dims
().
end
());
std
::
vector
<
std
::
size_t
>
dims
(
t
.
dims
().
begin
(),
t
.
dims
().
end
());
if
(
not
t
.
external_data
().
empty
())
auto
type
=
get_type
(
t
.
data_type
());
shape
tensor_shape
(
type
,
dims
);
auto
external_data
=
t
.
external_data
();
if
(
not
external_data
.
empty
())
{
{
const
std
::
string
&
data_file
=
t
.
external_data
().
at
(
0
).
value
();
const
std
::
string
&
data_file
=
external_data
.
at
(
0
).
value
();
auto
raw_buffer
=
read_buffer
(
path
+
"/"
+
data_file
);
size_t
num_data_fields
=
external_data
.
size
();
size_t
offset
=
0
;
size_t
nbytes
=
tensor_shape
.
bytes
();
if
(
num_data_fields
>
1
)
// if offset field is present
{
offset
=
std
::
stoul
(
t
.
external_data
().
at
(
1
).
value
());
}
if
(
num_data_fields
>
2
)
// if nbytes field is present
{
nbytes
=
std
::
stoul
(
t
.
external_data
().
at
(
2
).
value
());
}
auto
raw_buffer
=
read_buffer
(
path
+
"/"
+
data_file
,
offset
,
nbytes
);
std
::
string
s
(
raw_buffer
.
begin
(),
raw_buffer
.
end
());
std
::
string
s
(
raw_buffer
.
begin
(),
raw_buffer
.
end
());
auto
type
=
get_type
(
t
.
data_type
());
return
create_literal
(
type
,
dims
,
s
.
data
());
return
create_literal
(
type
,
dims
,
s
.
data
());
}
}
if
(
t
.
has_raw_data
())
if
(
t
.
has_raw_data
())
{
{
const
std
::
string
&
s
=
t
.
raw_data
();
const
std
::
string
&
s
=
t
.
raw_data
();
auto
type
=
get_type
(
t
.
data_type
());
return
create_literal
(
type
,
dims
,
s
.
data
());
return
create_literal
(
type
,
dims
,
s
.
data
());
}
}
...
...
src/onnx/parse_pooling.cpp
View file @
913ae362
...
@@ -47,52 +47,42 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -47,52 +47,42 @@ struct parse_pooling : op_parser<parse_pooling>
{
"GlobalLpPool"
,
"lpnorm"
}};
{
"GlobalLpPool"
,
"lpnorm"
}};
}
}
instruction_ref
parse
(
const
op_desc
&
opd
,
value
handle_values
(
const
op_desc
&
opd
,
const
onnx_parser
&
/*parser*/
,
onnx_parser
::
node_info
info
,
onnx_parser
::
node_info
info
,
const
shape
&
in_shape
,
std
::
vector
<
instruction_ref
>
arg
s
)
const
value
value
s
)
const
{
{
const
std
::
unordered_map
<
std
::
string
,
op
::
pooling_mode
>
mode_map
=
{
auto
kdims
=
in_shape
.
ndim
()
-
2
;
{
"max"
,
op
::
pooling_mode
::
max
},
{
"average"
,
op
::
pooling_mode
::
average
},
{
"lpnorm"
,
op
::
pooling_mode
::
lpnorm
}};
std
::
string
mode
=
opd
.
op_name
;
if
(
not
contains
(
mode_map
,
mode
))
{
MIGRAPHX_THROW
(
"onnx pooling mode must be [
\"
max
\"
,
\"
average
\"
,
\"
lpnorm
\"
]"
);
}
operation
op
=
make_op
(
"pooling"
,
{{
"mode"
,
mode_map
.
at
(
mode
)}});
value
values
=
op
.
to_value
();
auto
l0
=
args
[
0
];
auto
in_lens
=
l0
->
get_shape
().
lens
();
assert
(
in_lens
.
size
()
>
2
);
auto
kdims
=
in_lens
.
size
()
-
2
;
if
(
starts_with
(
opd
.
onnx_name
,
"Global"
))
if
(
starts_with
(
opd
.
onnx_name
,
"Global"
))
{
{
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
(
in_lens
.
begin
()
+
2
,
in_lens
.
end
());
// if spatial dimensions are dynamic use dyn_global flag
if
(
in_shape
.
dynamic
()
and
std
::
any_of
(
in_shape
.
dyn_dims
().
cbegin
()
+
2
,
in_shape
.
dyn_dims
().
cend
(),
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
values
[
"dyn_global"
]
=
true
;
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
();
}
else
{
// works with static and fixed dynamic shape
auto
m_lens
=
in_shape
.
max_lens
();
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
(
m_lens
.
begin
()
+
2
,
m_lens
.
end
());
}
}
}
// does not support ceil_mode
if
(
contains
(
info
.
attributes
,
"ceil_mode"
))
if
(
contains
(
info
.
attributes
,
"ceil_mode"
))
{
{
values
[
"ceil_mode"
]
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"ceil_mode"
).
i
());
values
[
"ceil_mode"
]
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"ceil_mode"
).
i
());
}
}
// count include padding, if count include pad is 1, we always use
// explicit pad
int
count_include_pad
=
0
;
if
(
contains
(
info
.
attributes
,
"count_include_pad"
))
{
count_include_pad
=
info
.
attributes
.
at
(
"count_include_pad"
).
i
();
}
if
(
contains
(
info
.
attributes
,
"strides"
))
if
(
contains
(
info
.
attributes
,
"strides"
))
{
{
values
[
"stride"
].
clear
();
values
[
"stride"
].
clear
();
copy
(
info
.
attributes
[
"strides"
].
ints
(),
std
::
back_inserter
(
values
[
"stride"
]));
copy
(
info
.
attributes
[
"strides"
].
ints
(),
std
::
back_inserter
(
values
[
"stride"
]));
check_attr_sizes
(
kdims
,
values
[
"stride"
].
size
(),
"PARSE_POOLING: inconsistent strides"
);
check_attr_sizes
(
kdims
,
values
[
"stride"
].
size
(),
"PARSE_POOLING: inconsistent strides"
);
}
}
if
(
contains
(
info
.
attributes
,
"kernel_shape"
))
if
(
contains
(
info
.
attributes
,
"kernel_shape"
))
{
{
values
[
"lengths"
].
clear
();
values
[
"lengths"
].
clear
();
...
@@ -110,6 +100,46 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -110,6 +100,46 @@ struct parse_pooling : op_parser<parse_pooling>
// ensure pads availabe only when auto_pad is "NOT_SET"
// ensure pads availabe only when auto_pad is "NOT_SET"
check_padding_mode
(
info
,
"POOLING"
);
check_padding_mode
(
info
,
"POOLING"
);
return
values
;
}
instruction_ref
parse
(
const
op_desc
&
opd
,
const
onnx_parser
&
/*parser*/
,
onnx_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
std
::
string
mode
=
opd
.
op_name
;
const
std
::
unordered_map
<
std
::
string
,
op
::
pooling_mode
>
mode_map
=
{
{
"max"
,
op
::
pooling_mode
::
max
},
{
"average"
,
op
::
pooling_mode
::
average
},
{
"lpnorm"
,
op
::
pooling_mode
::
lpnorm
}};
if
(
not
contains
(
mode_map
,
mode
))
{
MIGRAPHX_THROW
(
"PARSE_POOLING: onnx pooling mode must be [
\"
max
\"
,
\"
average
\"
,
\"
lpnorm
\"
]"
);
}
operation
op
=
make_op
(
"pooling"
,
{{
"mode"
,
mode_map
.
at
(
mode
)}});
value
values
=
op
.
to_value
();
auto
l0
=
args
[
0
];
auto
in_shape
=
l0
->
get_shape
();
assert
(
in_shape
.
ndim
()
>
2
);
auto
kdims
=
in_shape
.
ndim
()
-
2
;
values
=
handle_values
(
opd
,
info
,
in_shape
,
values
);
// count include padding, if count include pad is 1, we always use
// explicit pad
int
count_include_pad
=
0
;
if
(
contains
(
info
.
attributes
,
"count_include_pad"
))
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape"
);
}
count_include_pad
=
info
.
attributes
.
at
(
"count_include_pad"
).
i
();
}
std
::
vector
<
int64_t
>
paddings
;
std
::
vector
<
int64_t
>
paddings
;
float
pad_val
=
((
mode
==
"max"
)
?
std
::
numeric_limits
<
float
>::
lowest
()
:
0.0
f
);
float
pad_val
=
((
mode
==
"max"
)
?
std
::
numeric_limits
<
float
>::
lowest
()
:
0.0
f
);
...
@@ -123,14 +153,22 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -123,14 +153,22 @@ struct parse_pooling : op_parser<parse_pooling>
if
(
contains
(
info
.
attributes
,
"auto_pad"
))
if
(
contains
(
info
.
attributes
,
"auto_pad"
))
{
{
values
[
"padding"
].
clear
();
if
(
in_shape
.
dynamic
())
// return paddings could be empty, then setting to 0 for no padding
{
cal_auto_padding_size
(
info
,
MIGRAPHX_THROW
(
values
,
"PARSE_POOLING: Auto padding pooling with dynamic input shape not supported"
);
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
}
{
1
,
1
},
else
in_lens
,
{
paddings
);
values
[
"padding"
].
clear
();
// return paddings could be empty, then setting to 0 for no padding
cal_auto_padding_size
(
info
,
values
,
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
{
1
,
1
},
in_shape
.
lens
(),
paddings
);
}
}
}
if
(
paddings
.
size
()
!=
2
*
kdims
)
if
(
paddings
.
size
()
!=
2
*
kdims
)
...
@@ -150,6 +188,7 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -150,6 +188,7 @@ struct parse_pooling : op_parser<parse_pooling>
values
[
"stride"
].
resize
(
kdims
);
values
[
"stride"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
}
}
// used to calculate the supposed output shape
// used to calculate the supposed output shape
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
...
@@ -159,6 +198,11 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -159,6 +198,11 @@ struct parse_pooling : op_parser<parse_pooling>
if
(
not
slice_start
.
empty
())
if
(
not
slice_start
.
empty
())
{
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape"
);
}
// calculate expected output shape
// calculate expected output shape
orig_padding
.
insert
(
orig_padding
.
begin
()
+
kdims
,
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
()
+
kdims
,
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
(),
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
(),
2
,
0
);
...
...
src/onnx/parse_transpose.cpp
View file @
913ae362
...
@@ -47,7 +47,7 @@ struct parse_transpose : op_parser<parse_transpose>
...
@@ -47,7 +47,7 @@ struct parse_transpose : op_parser<parse_transpose>
}
}
// if perm is empty, use the default value
// if perm is empty, use the default value
auto
n_dim
=
args
.
front
()
->
get_shape
().
lens
().
size
();
auto
n_dim
=
args
.
front
()
->
get_shape
().
ndim
();
if
(
perm
.
empty
())
if
(
perm
.
empty
())
{
{
perm
.
resize
(
n_dim
);
perm
.
resize
(
n_dim
);
...
...
src/rewrite_rnn.cpp
View file @
913ae362
...
@@ -92,7 +92,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -92,7 +92,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// process sequence length
// process sequence length
instruction_ref
seq_lens
=
m
.
end
();
instruction_ref
seq_lens
=
m
.
end
();
if
((
args
.
size
()
>=
5
)
&&
args
[
4
]
->
name
()
!=
"
undefined
"
)
if
((
args
.
size
()
>=
5
)
and
not
args
[
4
]
->
is_
undefined
()
)
{
{
seq_lens
=
args
[
4
];
seq_lens
=
args
[
4
];
}
}
...
@@ -117,7 +117,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -117,7 +117,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// process bias
// process bias
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias_forward
=
m
.
insert_instruction
(
bias_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
...
@@ -129,7 +129,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -129,7 +129,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// or the 5th one (if the sequence len argument is ignored)
// or the 5th one (if the sequence len argument is ignored)
instruction_ref
ih_forward
{};
instruction_ref
ih_forward
{};
instruction_ref
ih_reverse
{};
instruction_ref
ih_reverse
{};
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih_forward
=
m
.
insert_instruction
(
ih_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
...
@@ -195,14 +195,14 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -195,14 +195,14 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// process bias and initial hidden state
// process bias and initial hidden state
instruction_ref
bias
=
m
.
end
();
instruction_ref
bias
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias
=
args
[
3
];
bias
=
args
[
3
];
}
}
// process intial hidden state
// process intial hidden state
instruction_ref
ih
;
instruction_ref
ih
;
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih
=
args
[
5
];
ih
=
args
[
5
];
}
}
...
@@ -398,7 +398,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -398,7 +398,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// process sequence length
// process sequence length
instruction_ref
seq_lens
=
m
.
end
();
instruction_ref
seq_lens
=
m
.
end
();
if
((
args
.
size
()
>=
5
)
&&
args
[
4
]
->
name
()
!=
"
undefined
"
)
if
((
args
.
size
()
>=
5
)
and
not
args
[
4
]
->
is_
undefined
()
)
{
{
seq_lens
=
args
[
4
];
seq_lens
=
args
[
4
];
}
}
...
@@ -423,7 +423,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -423,7 +423,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// bias
// bias
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias_forward
=
m
.
insert_instruction
(
bias_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
...
@@ -434,7 +434,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -434,7 +434,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// intial hidden state
// intial hidden state
instruction_ref
ih_forward
{};
instruction_ref
ih_forward
{};
instruction_ref
ih_reverse
{};
instruction_ref
ih_reverse
{};
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih_forward
=
m
.
insert_instruction
(
ih_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
...
@@ -501,14 +501,14 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -501,14 +501,14 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// bias
// bias
instruction_ref
bias
=
m
.
end
();
instruction_ref
bias
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias
=
args
[
3
];
bias
=
args
[
3
];
}
}
// intial hidden state
// intial hidden state
instruction_ref
ih
{};
instruction_ref
ih
{};
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih
=
args
[
5
];
ih
=
args
[
5
];
}
}
...
@@ -784,7 +784,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -784,7 +784,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process sequence length
// process sequence length
instruction_ref
seq_lens
=
m
.
end
();
instruction_ref
seq_lens
=
m
.
end
();
if
((
args
.
size
()
>=
5
)
&&
args
[
4
]
->
name
()
!=
"
undefined
"
)
if
((
args
.
size
()
>=
5
)
and
not
args
[
4
]
->
is_
undefined
()
)
{
{
seq_lens
=
args
[
4
];
seq_lens
=
args
[
4
];
}
}
...
@@ -813,7 +813,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -813,7 +813,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process bias
// process bias
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias_forward
=
m
.
insert_instruction
(
bias_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
...
@@ -824,7 +824,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -824,7 +824,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process intial hidden state, it is the 6th argument
// process intial hidden state, it is the 6th argument
instruction_ref
ih_forward
{};
instruction_ref
ih_forward
{};
instruction_ref
ih_reverse
{};
instruction_ref
ih_reverse
{};
if
(
args
.
size
()
>=
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih_forward
=
m
.
insert_instruction
(
ih_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
...
@@ -840,7 +840,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -840,7 +840,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process initial cell value
// process initial cell value
instruction_ref
ic_forward
{};
instruction_ref
ic_forward
{};
instruction_ref
ic_reverse
{};
instruction_ref
ic_reverse
{};
if
(
args
.
size
()
>=
7
&&
args
[
6
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
7
and
not
args
[
6
]
->
is_
undefined
()
)
{
{
ic_forward
=
m
.
insert_instruction
(
ic_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
6
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
6
]);
...
@@ -856,7 +856,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -856,7 +856,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process weight of the peephole
// process weight of the peephole
instruction_ref
pph_forward
=
m
.
end
();
instruction_ref
pph_forward
=
m
.
end
();
instruction_ref
pph_reverse
=
m
.
end
();
instruction_ref
pph_reverse
=
m
.
end
();
if
(
args
.
size
()
==
8
&&
args
[
7
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
8
and
not
args
[
7
]
->
is_
undefined
()
)
{
{
pph_forward
=
m
.
insert_instruction
(
pph_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
7
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
7
]);
...
@@ -940,14 +940,14 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -940,14 +940,14 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// bias
// bias
instruction_ref
bias
=
m
.
end
();
instruction_ref
bias
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias
=
args
[
3
];
bias
=
args
[
3
];
}
}
// initial hidden state
// initial hidden state
instruction_ref
ih
{};
instruction_ref
ih
{};
if
(
args
.
size
()
>=
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih
=
args
[
5
];
ih
=
args
[
5
];
}
}
...
@@ -958,7 +958,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -958,7 +958,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// initial cell value
// initial cell value
instruction_ref
ic
{};
instruction_ref
ic
{};
if
(
args
.
size
()
>=
7
&&
args
[
6
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
7
and
not
args
[
6
]
->
is_
undefined
()
)
{
{
ic
=
args
[
6
];
ic
=
args
[
6
];
}
}
...
@@ -969,7 +969,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -969,7 +969,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process weight of the peephole
// process weight of the peephole
instruction_ref
pph
=
m
.
end
();
instruction_ref
pph
=
m
.
end
();
if
(
args
.
size
()
==
8
&&
args
[
7
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
8
and
not
args
[
7
]
->
is_
undefined
()
)
{
{
pph
=
args
[
7
];
pph
=
args
[
7
];
}
}
...
...
src/shape.cpp
View file @
913ae362
...
@@ -521,6 +521,14 @@ std::ostream& operator<<(std::ostream& os, const shape::dynamic_dimension& x)
...
@@ -521,6 +521,14 @@ std::ostream& operator<<(std::ostream& os, const shape::dynamic_dimension& x)
return
os
;
return
os
;
}
}
bool
operator
==
(
const
shape
::
dynamic_dimension
&
x
,
const
std
::
size_t
&
y
)
{
return
x
.
min
==
y
and
x
.
max
==
y
;
}
bool
operator
==
(
const
std
::
size_t
&
x
,
const
shape
::
dynamic_dimension
&
y
)
{
return
y
==
x
;
}
bool
operator
!=
(
const
shape
::
dynamic_dimension
&
x
,
const
std
::
size_t
&
y
)
{
return
not
(
x
==
y
);
}
bool
operator
!=
(
const
std
::
size_t
&
x
,
const
shape
::
dynamic_dimension
&
y
)
{
return
not
(
x
==
y
);
}
bool
operator
==
(
const
shape
&
x
,
const
shape
&
y
)
bool
operator
==
(
const
shape
&
x
,
const
shape
&
y
)
{
{
if
(
x
.
dynamic
()
and
y
.
dynamic
())
if
(
x
.
dynamic
()
and
y
.
dynamic
())
...
...
src/targets/gpu/CMakeLists.txt
View file @
913ae362
...
@@ -231,14 +231,17 @@ endif()
...
@@ -231,14 +231,17 @@ endif()
include
(
CheckLibraryExists
)
include
(
CheckLibraryExists
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
#
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
# if(HAS_FIND_2_API)
# TODO: Set default to HAS_FIND_2_API
# target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
set
(
MIGRAPHX_USE_FIND_2_API OFF CACHE BOOL
""
)
# message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
# else()
if
(
MIGRAPHX_USE_FIND_2_API
)
# message(STATUS "MIOpen does not have Find-2.0 API")
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
# endif()
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
else
()
message
(
STATUS
"MIGraphx is using legacy Find API in MIOpen"
)
endif
()
if
(
HAS_FIND_MODE_API
)
if
(
HAS_FIND_MODE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API
)
...
...
src/targets/gpu/compile_hip.cpp
View file @
913ae362
...
@@ -185,7 +185,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -185,7 +185,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
options
.
push_back
(
"-fno-gpu-rdc"
);
options
.
push_back
(
"-fno-gpu-rdc"
);
options
.
push_back
(
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
));
options
.
push_back
(
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
));
options
.
push_back
(
"-Wno-cuda-compat"
);
options
.
push_back
(
"-Wno-cuda-compat"
);
options
.
push_back
(
"--
cuda-gpu
-arch="
+
arch
);
options
.
push_back
(
"--
offload
-arch="
+
arch
);
prog
.
compile
(
options
);
prog
.
compile
(
options
);
return
{
prog
.
get_code_obj
()};
return
{
prog
.
get_code_obj
()};
}
}
...
@@ -237,7 +237,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -237,7 +237,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
}
}
else
if
(
is_hip_clang_compiler
())
else
if
(
is_hip_clang_compiler
())
{
{
params
+=
" --
cuda-gpu
-arch="
+
arch
;
params
+=
" --
offload
-arch="
+
arch
;
params
+=
" --cuda-device-only"
;
params
+=
" --cuda-device-only"
;
params
+=
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
)
+
" "
;
params
+=
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
)
+
" "
;
}
}
...
...
src/targets/gpu/hip.cpp
View file @
913ae362
...
@@ -196,12 +196,21 @@ argument to_gpu(const argument& arg, bool host)
...
@@ -196,12 +196,21 @@ argument to_gpu(const argument& arg, bool host)
argument
from_gpu
(
const
argument
&
arg
)
argument
from_gpu
(
const
argument
&
arg
)
{
{
argument
result
;
argument
result
;
arg
.
visit
([
&
](
auto
x
)
{
arg
.
visit
(
using
type
=
typename
decltype
(
x
)
::
value_type
;
[
&
](
auto
x
)
{
auto
v
=
read_from_gpu
<
type
>
(
arg
.
data
(),
x
.
get_shape
().
bytes
()
/
sizeof
(
type
));
using
type
=
typename
decltype
(
x
)
::
value_type
;
// cppcheck-suppress returnDanglingLifetime
auto
v
=
read_from_gpu
<
type
>
(
arg
.
data
(),
x
.
get_shape
().
bytes
()
/
sizeof
(
type
));
result
=
{
x
.
get_shape
(),
[
v
]()
mutable
{
return
v
.
data
();
}};
// cppcheck-suppress returnDanglingLifetime
});
result
=
{
x
.
get_shape
(),
[
v
]()
mutable
{
return
v
.
data
();
}};
},
[
&
](
const
auto
&
xs
)
{
std
::
vector
<
argument
>
args
;
std
::
transform
(
xs
.
begin
(),
xs
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
auto
x
)
{
return
from_gpu
(
x
);
});
result
=
argument
{
args
};
});
return
result
;
return
result
;
}
}
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
913ae362
...
@@ -105,7 +105,7 @@ struct hip_copy_to_gpu
...
@@ -105,7 +105,7 @@ struct hip_copy_to_gpu
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
)
.
same_type
()
;
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
@@ -131,7 +131,7 @@ struct hip_copy_from_gpu
...
@@ -131,7 +131,7 @@ struct hip_copy_from_gpu
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
)
.
same_type
()
;
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
argument
...
@@ -159,7 +159,7 @@ struct hip_copy
...
@@ -159,7 +159,7 @@ struct hip_copy
std
::
string
name
()
const
{
return
"hip::copy"
;
}
std
::
string
name
()
const
{
return
"hip::copy"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
2
)
.
same_type
()
;
return
inputs
.
at
(
1
);
return
inputs
.
at
(
1
);
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
...
...
src/targets/gpu/jit/mlir.cpp
View file @
913ae362
...
@@ -24,7 +24,6 @@
...
@@ -24,7 +24,6 @@
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
View file @
913ae362
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/print.hpp>
#include <migraphx/kernels/print.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
View file @
913ae362
...
@@ -33,38 +33,6 @@
...
@@ -33,38 +33,6 @@
namespace
migraphx
{
namespace
migraphx
{
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
template
<
class
F
,
class
T
,
class
...
Ts
>
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
913ae362
...
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
...
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
}
}
}
}
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
src/targets/gpu/mlir.cpp
View file @
913ae362
...
@@ -32,7 +32,13 @@
...
@@ -32,7 +32,13 @@
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Registration.h>
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
#undef MIGRAPHX_MLIR
#else
#include <mlir-c/RegisterRocMLIR.h>
#endif
#endif
#endif
#include <migraphx/env.hpp>
#include <migraphx/env.hpp>
...
@@ -50,10 +56,6 @@
...
@@ -50,10 +56,6 @@
#include <deque>
#include <deque>
#include <variant>
#include <variant>
#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
#define MIGRAPHX_MLIR_BARE_POINTER
#endif
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
...
@@ -168,9 +170,11 @@ struct mlir_program
...
@@ -168,9 +170,11 @@ struct mlir_program
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
mmodule
(
mlirModuleCreateEmpty
(
location
))
{
{
MlirDialectHandle
mixr_handle
=
mlirGetDialectHandle__migraphx__
();
MlirDialectRegistry
registry
=
mlirDialectRegistryCreate
();
mlirDialectHandleRegisterDialect
(
mixr_handle
,
ctx
.
get
());
mlirRegisterRocMLIRDialects
(
registry
);
mlirRegisterAllDialects
(
ctx
.
get
());
mlirContextAppendDialectRegistry
(
ctx
.
get
(),
registry
);
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirDialectRegistryDestroy
(
registry
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
}
}
...
@@ -452,7 +456,8 @@ struct mlir_program
...
@@ -452,7 +456,8 @@ struct mlir_program
auto
ops
=
create_operation_state
(
"func.func"
);
auto
ops
=
create_operation_state
(
"func.func"
);
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
std
::
string
(
"main"
)},
{
"sym_name"
,
std
::
string
(
"main"
)},
{
"kernel"
,
std
::
string
(
"mixr"
)}});
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
ops
.
add_region
(
std
::
move
(
region
));
ops
.
add_region
(
std
::
move
(
region
));
insert
(
body
,
std
::
move
(
ops
));
insert
(
body
,
std
::
move
(
ops
));
...
@@ -512,7 +517,8 @@ struct mlir_program
...
@@ -512,7 +517,8 @@ struct mlir_program
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
// check if HW supports xdlops
// check if HW supports xdlops
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_name
);
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
...
@@ -540,7 +546,7 @@ struct mlir_program
...
@@ -540,7 +546,7 @@ struct mlir_program
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
// 2nd pipeline to call
// 2nd pipeline to call
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
name
.
c_str
()
,
"amdgcn-amd-amdhsa"
,
""
);
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
arch
.
c_str
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
code_object_op
op
{};
code_object_op
op
{};
...
@@ -550,16 +556,7 @@ struct mlir_program
...
@@ -550,16 +556,7 @@ struct mlir_program
return
op
;
return
op
;
}
}
void
find_target
()
void
find_target
()
{
target_arch
=
get_device_name
();
}
{
std
::
string
tname
=
get_device_name
();
// HACK: Since MLIR can't handle the full target name
target_name
=
trim
(
split_string
(
tname
,
':'
).
front
());
if
(
tname
.
size
()
!=
target_name
.
size
())
std
::
cout
<<
"*************** WARNING: MLIR may not compile the correct target features for: "
<<
tname
<<
std
::
endl
;
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
{
...
@@ -588,7 +585,7 @@ struct mlir_program
...
@@ -588,7 +585,7 @@ struct mlir_program
mlir_module
mmodule
;
mlir_module
mmodule
;
problem_params
pp
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_
name
;
std
::
string
target_
arch
;
};
};
std
::
string
dump_mlir
(
const
module
&
m
)
std
::
string
dump_mlir
(
const
module
&
m
)
...
@@ -650,6 +647,10 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
...
@@ -650,6 +647,10 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
mlir_program
mp
;
mlir_program
mp
;
mp
.
find_target
();
mp
.
find_target
();
mp
.
parse
(
m
);
mp
.
parse
(
m
);
...
@@ -669,46 +670,9 @@ instruction_ref insert_mlir(module& m,
...
@@ -669,46 +670,9 @@ instruction_ref insert_mlir(module& m,
std
::
vector
<
instruction_ref
>
refs
;
std
::
vector
<
instruction_ref
>
refs
;
std
::
size_t
last
=
0
;
std
::
size_t
last
=
0
;
#ifdef MIGRAPHX_MLIR_BARE_POINTER
refs
.
reserve
(
inputs
.
size
());
refs
.
reserve
(
inputs
.
size
());
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
last
=
refs
.
size
()
-
1
;
last
=
refs
.
size
()
-
1
;
#else
refs
.
reserve
(
inputs
.
size
()
*
15
);
std
::
unordered_map
<
uint64_t
,
instruction_ref
>
literal_map
{};
auto
get_literal
=
[
&
](
uint64_t
value
)
{
auto
fi
=
literal_map
.
find
(
value
);
if
(
fi
!=
literal_map
.
end
())
return
fi
->
second
;
auto
lit
=
m
.
add_literal
(
value
);
literal_map
.
emplace
(
value
,
lit
);
return
lit
;
};
for
(
auto
input
:
inputs
)
{
const
size_t
offset
=
0
;
auto
s
=
input
->
get_shape
();
last
=
refs
.
size
();
refs
.
push_back
(
input
);
refs
.
push_back
(
input
);
refs
.
push_back
(
get_literal
(
offset
));
// offset
// dim sizes
std
::
transform
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
// dim strides
std
::
transform
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
}
#endif
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
output_arg
=
last
;
co
.
output_arg
=
last
;
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
...
...
src/targets/gpu/perfdb.cpp
View file @
913ae362
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <fstream>
#include <fstream>
#include <mutex>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
...
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
auto
query_miopen_db
(
const
std
::
string
&
query
)
auto
query_miopen_db
(
const
std
::
string
&
query
)
{
{
static
std
::
mutex
g_db_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_db_mutex
);
// TODO: Store db as a static variable
// TODO: Store db as a static variable
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
// Check if db file exists.
// Check if db file exists.
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
913ae362
...
@@ -51,17 +51,20 @@ struct layernorm_base
...
@@ -51,17 +51,20 @@ struct layernorm_base
}
}
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
auto
s
=
inputs
.
at
(
0
);
auto
s
=
inputs
.
at
(
0
);
auto
t
=
s
.
type
();
if
(
not
mods
.
empty
())
t
=
mods
.
front
()
->
get_output_shapes
().
front
().
type
();
if
(
s
.
scalar
())
if
(
s
.
scalar
())
{
{
return
s
;
return
s
;
}
}
else
if
(
s
.
broadcasted
())
else
if
(
s
.
broadcasted
())
{
{
return
{
s
.
type
()
,
s
.
lens
()};
return
{
t
,
s
.
lens
()};
}
}
else
else
{
{
return
s
.
with_lens
(
s
.
lens
());
return
s
.
with_lens
(
t
,
s
.
lens
());
}
}
}
}
};
};
...
...
src/targets/gpu/target.cpp
View file @
913ae362
...
@@ -139,8 +139,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -139,8 +139,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
pack_int8_args
{},
pack_int8_args
{},
dead_code_elimination
{},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}},
dead_code_elimination
{},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
dead_code_elimination
{},
dead_code_elimination
{},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment