Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
b98308b8
Unverified
Commit
b98308b8
authored
Dec 27, 2022
by
Charlie Lin
Committed by
GitHub
Dec 27, 2022
Browse files
Merge branch 'develop' into dyn_onnx_matmul
parents
b48c4cf6
56c43445
Changes
58
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
329 additions
and
193 deletions
+329
-193
src/onnx/onnx_parser.cpp
src/onnx/onnx_parser.cpp
+18
-5
src/onnx/parse_pooling.cpp
src/onnx/parse_pooling.cpp
+82
-38
src/program.cpp
src/program.cpp
+19
-0
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+19
-19
src/shape.cpp
src/shape.cpp
+8
-0
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+2
-2
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+15
-6
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+3
-3
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+0
-1
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
...argets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
+1
-0
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+0
-32
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+32
-0
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+24
-60
src/targets/gpu/perfdb.cpp
src/targets/gpu/perfdb.cpp
+4
-0
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+5
-2
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+4
-4
test/gpu/hip.cpp
test/gpu/hip.cpp
+50
-0
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+2
-2
test/instruction.cpp
test/instruction.cpp
+22
-19
test/literal_test.cpp
test/literal_test.cpp
+19
-0
No files found.
src/onnx/onnx_parser.cpp
View file @
b98308b8
...
@@ -393,18 +393,31 @@ literal onnx_parser::parse_value(const onnx::AttributeProto& attr) const
...
@@ -393,18 +393,31 @@ literal onnx_parser::parse_value(const onnx::AttributeProto& attr) const
literal
onnx_parser
::
parse_tensor
(
const
onnx
::
TensorProto
&
t
)
const
literal
onnx_parser
::
parse_tensor
(
const
onnx
::
TensorProto
&
t
)
const
{
{
std
::
vector
<
std
::
size_t
>
dims
(
t
.
dims
().
begin
(),
t
.
dims
().
end
());
std
::
vector
<
std
::
size_t
>
dims
(
t
.
dims
().
begin
(),
t
.
dims
().
end
());
if
(
not
t
.
external_data
().
empty
())
auto
type
=
get_type
(
t
.
data_type
());
shape
tensor_shape
(
type
,
dims
);
auto
external_data
=
t
.
external_data
();
if
(
not
external_data
.
empty
())
{
const
std
::
string
&
data_file
=
external_data
.
at
(
0
).
value
();
size_t
num_data_fields
=
external_data
.
size
();
size_t
offset
=
0
;
size_t
nbytes
=
tensor_shape
.
bytes
();
if
(
num_data_fields
>
1
)
// if offset field is present
{
offset
=
std
::
stoul
(
t
.
external_data
().
at
(
1
).
value
());
}
if
(
num_data_fields
>
2
)
// if nbytes field is present
{
{
const
std
::
string
&
data_file
=
t
.
external_data
().
at
(
0
).
value
();
nbytes
=
std
::
stoul
(
t
.
external_data
().
at
(
2
).
value
());
auto
raw_buffer
=
read_buffer
(
path
+
"/"
+
data_file
);
}
auto
raw_buffer
=
read_buffer
(
path
+
"/"
+
data_file
,
offset
,
nbytes
);
std
::
string
s
(
raw_buffer
.
begin
(),
raw_buffer
.
end
());
std
::
string
s
(
raw_buffer
.
begin
(),
raw_buffer
.
end
());
auto
type
=
get_type
(
t
.
data_type
());
return
create_literal
(
type
,
dims
,
s
.
data
());
return
create_literal
(
type
,
dims
,
s
.
data
());
}
}
if
(
t
.
has_raw_data
())
if
(
t
.
has_raw_data
())
{
{
const
std
::
string
&
s
=
t
.
raw_data
();
const
std
::
string
&
s
=
t
.
raw_data
();
auto
type
=
get_type
(
t
.
data_type
());
return
create_literal
(
type
,
dims
,
s
.
data
());
return
create_literal
(
type
,
dims
,
s
.
data
());
}
}
...
...
src/onnx/parse_pooling.cpp
View file @
b98308b8
...
@@ -47,52 +47,42 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -47,52 +47,42 @@ struct parse_pooling : op_parser<parse_pooling>
{
"GlobalLpPool"
,
"lpnorm"
}};
{
"GlobalLpPool"
,
"lpnorm"
}};
}
}
instruction_ref
parse
(
const
op_desc
&
opd
,
value
handle_values
(
const
op_desc
&
opd
,
const
onnx_parser
&
/*parser*/
,
onnx_parser
::
node_info
info
,
onnx_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
const
shape
&
in_shape
,
value
values
)
const
{
{
const
std
::
unordered_map
<
std
::
string
,
op
::
pooling_mode
>
mode_map
=
{
auto
kdims
=
in_shape
.
ndim
()
-
2
;
{
"max"
,
op
::
pooling_mode
::
max
},
if
(
starts_with
(
opd
.
onnx_name
,
"Global"
))
{
"average"
,
op
::
pooling_mode
::
average
},
{
"lpnorm"
,
op
::
pooling_mode
::
lpnorm
}};
std
::
string
mode
=
opd
.
op_name
;
if
(
not
contains
(
mode_map
,
mode
))
{
{
MIGRAPHX_THROW
(
"onnx pooling mode must be [
\"
max
\"
,
\"
average
\"
,
\"
lpnorm
\"
]"
);
// if spatial dimensions are dynamic use dyn_global flag
if
(
in_shape
.
dynamic
()
and
std
::
any_of
(
in_shape
.
dyn_dims
().
cbegin
()
+
2
,
in_shape
.
dyn_dims
().
cend
(),
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
values
[
"dyn_global"
]
=
true
;
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
();
}
}
operation
op
=
make_op
(
"pooling"
,
{{
"mode"
,
mode_map
.
at
(
mode
)}});
else
value
values
=
op
.
to_value
();
auto
l0
=
args
[
0
];
auto
in_lens
=
l0
->
get_shape
().
lens
();
assert
(
in_lens
.
size
()
>
2
);
auto
kdims
=
in_lens
.
size
()
-
2
;
if
(
starts_with
(
opd
.
onnx_name
,
"Global"
))
{
{
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
(
in_lens
.
begin
()
+
2
,
in_lens
.
end
());
// works with static and fixed dynamic shape
auto
m_lens
=
in_shape
.
max_lens
();
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
(
m_lens
.
begin
()
+
2
,
m_lens
.
end
());
}
}
}
// does not support ceil_mode
if
(
contains
(
info
.
attributes
,
"ceil_mode"
))
if
(
contains
(
info
.
attributes
,
"ceil_mode"
))
{
{
values
[
"ceil_mode"
]
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"ceil_mode"
).
i
());
values
[
"ceil_mode"
]
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"ceil_mode"
).
i
());
}
}
// count include padding, if count include pad is 1, we always use
// explicit pad
int
count_include_pad
=
0
;
if
(
contains
(
info
.
attributes
,
"count_include_pad"
))
{
count_include_pad
=
info
.
attributes
.
at
(
"count_include_pad"
).
i
();
}
if
(
contains
(
info
.
attributes
,
"strides"
))
if
(
contains
(
info
.
attributes
,
"strides"
))
{
{
values
[
"stride"
].
clear
();
values
[
"stride"
].
clear
();
copy
(
info
.
attributes
[
"strides"
].
ints
(),
std
::
back_inserter
(
values
[
"stride"
]));
copy
(
info
.
attributes
[
"strides"
].
ints
(),
std
::
back_inserter
(
values
[
"stride"
]));
check_attr_sizes
(
kdims
,
values
[
"stride"
].
size
(),
"PARSE_POOLING: inconsistent strides"
);
check_attr_sizes
(
kdims
,
values
[
"stride"
].
size
(),
"PARSE_POOLING: inconsistent strides"
);
}
}
if
(
contains
(
info
.
attributes
,
"kernel_shape"
))
if
(
contains
(
info
.
attributes
,
"kernel_shape"
))
{
{
values
[
"lengths"
].
clear
();
values
[
"lengths"
].
clear
();
...
@@ -110,6 +100,46 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -110,6 +100,46 @@ struct parse_pooling : op_parser<parse_pooling>
// ensure pads availabe only when auto_pad is "NOT_SET"
// ensure pads availabe only when auto_pad is "NOT_SET"
check_padding_mode
(
info
,
"POOLING"
);
check_padding_mode
(
info
,
"POOLING"
);
return
values
;
}
instruction_ref
parse
(
const
op_desc
&
opd
,
const
onnx_parser
&
/*parser*/
,
onnx_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
std
::
string
mode
=
opd
.
op_name
;
const
std
::
unordered_map
<
std
::
string
,
op
::
pooling_mode
>
mode_map
=
{
{
"max"
,
op
::
pooling_mode
::
max
},
{
"average"
,
op
::
pooling_mode
::
average
},
{
"lpnorm"
,
op
::
pooling_mode
::
lpnorm
}};
if
(
not
contains
(
mode_map
,
mode
))
{
MIGRAPHX_THROW
(
"PARSE_POOLING: onnx pooling mode must be [
\"
max
\"
,
\"
average
\"
,
\"
lpnorm
\"
]"
);
}
operation
op
=
make_op
(
"pooling"
,
{{
"mode"
,
mode_map
.
at
(
mode
)}});
value
values
=
op
.
to_value
();
auto
l0
=
args
[
0
];
auto
in_shape
=
l0
->
get_shape
();
assert
(
in_shape
.
ndim
()
>
2
);
auto
kdims
=
in_shape
.
ndim
()
-
2
;
values
=
handle_values
(
opd
,
info
,
in_shape
,
values
);
// count include padding, if count include pad is 1, we always use
// explicit pad
int
count_include_pad
=
0
;
if
(
contains
(
info
.
attributes
,
"count_include_pad"
))
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape"
);
}
count_include_pad
=
info
.
attributes
.
at
(
"count_include_pad"
).
i
();
}
std
::
vector
<
int64_t
>
paddings
;
std
::
vector
<
int64_t
>
paddings
;
float
pad_val
=
((
mode
==
"max"
)
?
std
::
numeric_limits
<
float
>::
lowest
()
:
0.0
f
);
float
pad_val
=
((
mode
==
"max"
)
?
std
::
numeric_limits
<
float
>::
lowest
()
:
0.0
f
);
...
@@ -122,6 +152,13 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -122,6 +152,13 @@ struct parse_pooling : op_parser<parse_pooling>
}
}
if
(
contains
(
info
.
attributes
,
"auto_pad"
))
if
(
contains
(
info
.
attributes
,
"auto_pad"
))
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: Auto padding pooling with dynamic input shape not supported"
);
}
else
{
{
values
[
"padding"
].
clear
();
values
[
"padding"
].
clear
();
// return paddings could be empty, then setting to 0 for no padding
// return paddings could be empty, then setting to 0 for no padding
...
@@ -129,9 +166,10 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -129,9 +166,10 @@ struct parse_pooling : op_parser<parse_pooling>
values
,
values
,
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
{
1
,
1
},
{
1
,
1
},
in_
lens
,
in_shape
.
lens
()
,
paddings
);
paddings
);
}
}
}
if
(
paddings
.
size
()
!=
2
*
kdims
)
if
(
paddings
.
size
()
!=
2
*
kdims
)
{
{
...
@@ -150,6 +188,7 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -150,6 +188,7 @@ struct parse_pooling : op_parser<parse_pooling>
values
[
"stride"
].
resize
(
kdims
);
values
[
"stride"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
}
}
// used to calculate the supposed output shape
// used to calculate the supposed output shape
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
...
@@ -159,6 +198,11 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -159,6 +198,11 @@ struct parse_pooling : op_parser<parse_pooling>
if
(
not
slice_start
.
empty
())
if
(
not
slice_start
.
empty
())
{
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape"
);
}
// calculate expected output shape
// calculate expected output shape
orig_padding
.
insert
(
orig_padding
.
begin
()
+
kdims
,
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
()
+
kdims
,
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
(),
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
(),
2
,
0
);
...
...
src/program.cpp
View file @
b98308b8
...
@@ -854,6 +854,25 @@ void program::print_graph(std::ostream& os, bool brief) const
...
@@ -854,6 +854,25 @@ void program::print_graph(std::ostream& os, bool brief) const
mm
->
print_graph
(
os
,
brief
);
mm
->
print_graph
(
os
,
brief
);
}
}
void
program
::
print_py
(
std
::
ostream
&
os
)
const
{
auto
vec_modules
=
this
->
get_modules
();
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
names
;
os
<<
"p = migraphx.program()
\n
"
;
for
(
auto
&
mod
:
vec_modules
)
{
std
::
string
var_name
=
"m"
+
mod
->
name
();
os
<<
var_name
<<
" = "
;
if
(
mod
->
name
()
==
"main"
)
os
<<
"p.get_main_module()"
;
else
os
<<
"p.create_module(
\"
"
<<
mod
->
name
()
<<
"
\"
);"
;
os
<<
std
::
endl
;
names
=
mod
->
print_py
(
os
,
var_name
,
names
);
os
<<
std
::
endl
;
}
}
void
program
::
print_cpp
(
std
::
ostream
&
os
)
const
void
program
::
print_cpp
(
std
::
ostream
&
os
)
const
{
{
auto
vec_modules
=
this
->
get_modules
();
auto
vec_modules
=
this
->
get_modules
();
...
...
src/rewrite_rnn.cpp
View file @
b98308b8
...
@@ -92,7 +92,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -92,7 +92,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// process sequence length
// process sequence length
instruction_ref
seq_lens
=
m
.
end
();
instruction_ref
seq_lens
=
m
.
end
();
if
((
args
.
size
()
>=
5
)
&&
args
[
4
]
->
name
()
!=
"
undefined
"
)
if
((
args
.
size
()
>=
5
)
and
not
args
[
4
]
->
is_
undefined
()
)
{
{
seq_lens
=
args
[
4
];
seq_lens
=
args
[
4
];
}
}
...
@@ -117,7 +117,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -117,7 +117,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// process bias
// process bias
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias_forward
=
m
.
insert_instruction
(
bias_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
...
@@ -129,7 +129,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -129,7 +129,7 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// or the 5th one (if the sequence len argument is ignored)
// or the 5th one (if the sequence len argument is ignored)
instruction_ref
ih_forward
{};
instruction_ref
ih_forward
{};
instruction_ref
ih_reverse
{};
instruction_ref
ih_reverse
{};
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih_forward
=
m
.
insert_instruction
(
ih_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
...
@@ -195,14 +195,14 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
...
@@ -195,14 +195,14 @@ void rewrite_rnn::apply_vanilla_rnn(module& m, instruction_ref ins) const
// process bias and initial hidden state
// process bias and initial hidden state
instruction_ref
bias
=
m
.
end
();
instruction_ref
bias
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias
=
args
[
3
];
bias
=
args
[
3
];
}
}
// process intial hidden state
// process intial hidden state
instruction_ref
ih
;
instruction_ref
ih
;
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih
=
args
[
5
];
ih
=
args
[
5
];
}
}
...
@@ -398,7 +398,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -398,7 +398,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// process sequence length
// process sequence length
instruction_ref
seq_lens
=
m
.
end
();
instruction_ref
seq_lens
=
m
.
end
();
if
((
args
.
size
()
>=
5
)
&&
args
[
4
]
->
name
()
!=
"
undefined
"
)
if
((
args
.
size
()
>=
5
)
and
not
args
[
4
]
->
is_
undefined
()
)
{
{
seq_lens
=
args
[
4
];
seq_lens
=
args
[
4
];
}
}
...
@@ -423,7 +423,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -423,7 +423,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// bias
// bias
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias_forward
=
m
.
insert_instruction
(
bias_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
...
@@ -434,7 +434,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -434,7 +434,7 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// intial hidden state
// intial hidden state
instruction_ref
ih_forward
{};
instruction_ref
ih_forward
{};
instruction_ref
ih_reverse
{};
instruction_ref
ih_reverse
{};
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih_forward
=
m
.
insert_instruction
(
ih_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
...
@@ -501,14 +501,14 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
...
@@ -501,14 +501,14 @@ void rewrite_rnn::apply_gru(module& m, instruction_ref ins) const
// bias
// bias
instruction_ref
bias
=
m
.
end
();
instruction_ref
bias
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias
=
args
[
3
];
bias
=
args
[
3
];
}
}
// intial hidden state
// intial hidden state
instruction_ref
ih
{};
instruction_ref
ih
{};
if
(
args
.
size
()
==
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih
=
args
[
5
];
ih
=
args
[
5
];
}
}
...
@@ -784,7 +784,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -784,7 +784,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process sequence length
// process sequence length
instruction_ref
seq_lens
=
m
.
end
();
instruction_ref
seq_lens
=
m
.
end
();
if
((
args
.
size
()
>=
5
)
&&
args
[
4
]
->
name
()
!=
"
undefined
"
)
if
((
args
.
size
()
>=
5
)
and
not
args
[
4
]
->
is_
undefined
()
)
{
{
seq_lens
=
args
[
4
];
seq_lens
=
args
[
4
];
}
}
...
@@ -813,7 +813,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -813,7 +813,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process bias
// process bias
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_forward
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
instruction_ref
bias_reverse
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias_forward
=
m
.
insert_instruction
(
bias_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
3
]);
...
@@ -824,7 +824,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -824,7 +824,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process intial hidden state, it is the 6th argument
// process intial hidden state, it is the 6th argument
instruction_ref
ih_forward
{};
instruction_ref
ih_forward
{};
instruction_ref
ih_reverse
{};
instruction_ref
ih_reverse
{};
if
(
args
.
size
()
>=
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih_forward
=
m
.
insert_instruction
(
ih_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
5
]);
...
@@ -840,7 +840,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -840,7 +840,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process initial cell value
// process initial cell value
instruction_ref
ic_forward
{};
instruction_ref
ic_forward
{};
instruction_ref
ic_reverse
{};
instruction_ref
ic_reverse
{};
if
(
args
.
size
()
>=
7
&&
args
[
6
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
7
and
not
args
[
6
]
->
is_
undefined
()
)
{
{
ic_forward
=
m
.
insert_instruction
(
ic_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
6
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
6
]);
...
@@ -856,7 +856,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -856,7 +856,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process weight of the peephole
// process weight of the peephole
instruction_ref
pph_forward
=
m
.
end
();
instruction_ref
pph_forward
=
m
.
end
();
instruction_ref
pph_reverse
=
m
.
end
();
instruction_ref
pph_reverse
=
m
.
end
();
if
(
args
.
size
()
==
8
&&
args
[
7
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
8
and
not
args
[
7
]
->
is_
undefined
()
)
{
{
pph_forward
=
m
.
insert_instruction
(
pph_forward
=
m
.
insert_instruction
(
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
7
]);
ins
,
make_op
(
"slice"
,
{{
"axes"
,
{
0
}},
{
"starts"
,
{
0
}},
{
"ends"
,
{
1
}}}),
args
[
7
]);
...
@@ -940,14 +940,14 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -940,14 +940,14 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// bias
// bias
instruction_ref
bias
=
m
.
end
();
instruction_ref
bias
=
m
.
end
();
if
(
args
.
size
()
>=
4
&&
args
[
3
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
4
and
not
args
[
3
]
->
is_
undefined
()
)
{
{
bias
=
args
[
3
];
bias
=
args
[
3
];
}
}
// initial hidden state
// initial hidden state
instruction_ref
ih
{};
instruction_ref
ih
{};
if
(
args
.
size
()
>=
6
&&
args
[
5
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
6
and
not
args
[
5
]
->
is_
undefined
()
)
{
{
ih
=
args
[
5
];
ih
=
args
[
5
];
}
}
...
@@ -958,7 +958,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -958,7 +958,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// initial cell value
// initial cell value
instruction_ref
ic
{};
instruction_ref
ic
{};
if
(
args
.
size
()
>=
7
&&
args
[
6
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
>=
7
and
not
args
[
6
]
->
is_
undefined
()
)
{
{
ic
=
args
[
6
];
ic
=
args
[
6
];
}
}
...
@@ -969,7 +969,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
...
@@ -969,7 +969,7 @@ void rewrite_rnn::apply_lstm(module& m, instruction_ref ins) const
// process weight of the peephole
// process weight of the peephole
instruction_ref
pph
=
m
.
end
();
instruction_ref
pph
=
m
.
end
();
if
(
args
.
size
()
==
8
&&
args
[
7
]
->
name
()
!=
"
undefined
"
)
if
(
args
.
size
()
==
8
and
not
args
[
7
]
->
is_
undefined
()
)
{
{
pph
=
args
[
7
];
pph
=
args
[
7
];
}
}
...
...
src/shape.cpp
View file @
b98308b8
...
@@ -521,6 +521,14 @@ std::ostream& operator<<(std::ostream& os, const shape::dynamic_dimension& x)
...
@@ -521,6 +521,14 @@ std::ostream& operator<<(std::ostream& os, const shape::dynamic_dimension& x)
return
os
;
return
os
;
}
}
bool
operator
==
(
const
shape
::
dynamic_dimension
&
x
,
const
std
::
size_t
&
y
)
{
return
x
.
min
==
y
and
x
.
max
==
y
;
}
bool
operator
==
(
const
std
::
size_t
&
x
,
const
shape
::
dynamic_dimension
&
y
)
{
return
y
==
x
;
}
bool
operator
!=
(
const
shape
::
dynamic_dimension
&
x
,
const
std
::
size_t
&
y
)
{
return
not
(
x
==
y
);
}
bool
operator
!=
(
const
std
::
size_t
&
x
,
const
shape
::
dynamic_dimension
&
y
)
{
return
not
(
x
==
y
);
}
bool
operator
==
(
const
shape
&
x
,
const
shape
&
y
)
bool
operator
==
(
const
shape
&
x
,
const
shape
&
y
)
{
{
if
(
x
.
dynamic
()
and
y
.
dynamic
())
if
(
x
.
dynamic
()
and
y
.
dynamic
())
...
...
src/targets/gpu/compile_hip.cpp
View file @
b98308b8
...
@@ -185,7 +185,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -185,7 +185,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
options
.
push_back
(
"-fno-gpu-rdc"
);
options
.
push_back
(
"-fno-gpu-rdc"
);
options
.
push_back
(
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
));
options
.
push_back
(
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
));
options
.
push_back
(
"-Wno-cuda-compat"
);
options
.
push_back
(
"-Wno-cuda-compat"
);
options
.
push_back
(
"--
cuda-gpu
-arch="
+
arch
);
options
.
push_back
(
"--
offload
-arch="
+
arch
);
prog
.
compile
(
options
);
prog
.
compile
(
options
);
return
{
prog
.
get_code_obj
()};
return
{
prog
.
get_code_obj
()};
}
}
...
@@ -237,7 +237,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -237,7 +237,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
}
}
else
if
(
is_hip_clang_compiler
())
else
if
(
is_hip_clang_compiler
())
{
{
params
+=
" --
cuda-gpu
-arch="
+
arch
;
params
+=
" --
offload
-arch="
+
arch
;
params
+=
" --cuda-device-only"
;
params
+=
" --cuda-device-only"
;
params
+=
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
)
+
" "
;
params
+=
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
)
+
" "
;
}
}
...
...
src/targets/gpu/hip.cpp
View file @
b98308b8
...
@@ -196,12 +196,21 @@ argument to_gpu(const argument& arg, bool host)
...
@@ -196,12 +196,21 @@ argument to_gpu(const argument& arg, bool host)
argument
from_gpu
(
const
argument
&
arg
)
argument
from_gpu
(
const
argument
&
arg
)
{
{
argument
result
;
argument
result
;
arg
.
visit
([
&
](
auto
x
)
{
arg
.
visit
(
[
&
](
auto
x
)
{
using
type
=
typename
decltype
(
x
)
::
value_type
;
using
type
=
typename
decltype
(
x
)
::
value_type
;
auto
v
=
read_from_gpu
<
type
>
(
arg
.
data
(),
x
.
get_shape
().
bytes
()
/
sizeof
(
type
));
auto
v
=
read_from_gpu
<
type
>
(
arg
.
data
(),
x
.
get_shape
().
bytes
()
/
sizeof
(
type
));
// cppcheck-suppress returnDanglingLifetime
// cppcheck-suppress returnDanglingLifetime
result
=
{
x
.
get_shape
(),
[
v
]()
mutable
{
return
v
.
data
();
}};
result
=
{
x
.
get_shape
(),
[
v
]()
mutable
{
return
v
.
data
();
}};
},
[
&
](
const
auto
&
xs
)
{
std
::
vector
<
argument
>
args
;
std
::
transform
(
xs
.
begin
(),
xs
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
auto
x
)
{
return
from_gpu
(
x
);
});
});
result
=
argument
{
args
};
});
return
result
;
return
result
;
}
}
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
b98308b8
...
@@ -105,7 +105,7 @@ struct hip_copy_to_gpu
...
@@ -105,7 +105,7 @@ struct hip_copy_to_gpu
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_to_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
)
.
same_type
()
;
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
...
@@ -131,7 +131,7 @@ struct hip_copy_from_gpu
...
@@ -131,7 +131,7 @@ struct hip_copy_from_gpu
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
std
::
string
name
()
const
{
return
"hip::copy_from_gpu"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
,
2
)
.
same_type
()
;
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
argument
argument
...
@@ -159,7 +159,7 @@ struct hip_copy
...
@@ -159,7 +159,7 @@ struct hip_copy
std
::
string
name
()
const
{
return
"hip::copy"
;
}
std
::
string
name
()
const
{
return
"hip::copy"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
2
)
.
same_type
()
;
return
inputs
.
at
(
1
);
return
inputs
.
at
(
1
);
}
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
...
...
src/targets/gpu/jit/mlir.cpp
View file @
b98308b8
...
@@ -24,7 +24,6 @@
...
@@ -24,7 +24,6 @@
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
View file @
b98308b8
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/reduce.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/print.hpp>
#include <migraphx/kernels/print.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
View file @
b98308b8
...
@@ -33,38 +33,6 @@
...
@@ -33,38 +33,6 @@
namespace
migraphx
{
namespace
migraphx
{
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
template
<
class
F
,
class
T
,
class
...
Ts
>
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
b98308b8
...
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
...
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
}
}
}
}
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
src/targets/gpu/mlir.cpp
View file @
b98308b8
...
@@ -32,7 +32,13 @@
...
@@ -32,7 +32,13 @@
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Registration.h>
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
#undef MIGRAPHX_MLIR
#else
#include <mlir-c/RegisterRocMLIR.h>
#endif
#endif
#endif
#include <migraphx/env.hpp>
#include <migraphx/env.hpp>
...
@@ -50,10 +56,6 @@
...
@@ -50,10 +56,6 @@
#include <deque>
#include <deque>
#include <variant>
#include <variant>
#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
#define MIGRAPHX_MLIR_BARE_POINTER
#endif
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
...
@@ -168,9 +170,11 @@ struct mlir_program
...
@@ -168,9 +170,11 @@ struct mlir_program
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
mmodule
(
mlirModuleCreateEmpty
(
location
))
{
{
MlirDialectHandle
mixr_handle
=
mlirGetDialectHandle__migraphx__
();
MlirDialectRegistry
registry
=
mlirDialectRegistryCreate
();
mlirDialectHandleRegisterDialect
(
mixr_handle
,
ctx
.
get
());
mlirRegisterRocMLIRDialects
(
registry
);
mlirRegisterAllDialects
(
ctx
.
get
());
mlirContextAppendDialectRegistry
(
ctx
.
get
(),
registry
);
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirDialectRegistryDestroy
(
registry
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
}
}
...
@@ -452,7 +456,8 @@ struct mlir_program
...
@@ -452,7 +456,8 @@ struct mlir_program
auto
ops
=
create_operation_state
(
"func.func"
);
auto
ops
=
create_operation_state
(
"func.func"
);
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
std
::
string
(
"main"
)},
{
"sym_name"
,
std
::
string
(
"main"
)},
{
"kernel"
,
std
::
string
(
"mixr"
)}});
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
ops
.
add_region
(
std
::
move
(
region
));
ops
.
add_region
(
std
::
move
(
region
));
insert
(
body
,
std
::
move
(
ops
));
insert
(
body
,
std
::
move
(
ops
));
...
@@ -512,7 +517,8 @@ struct mlir_program
...
@@ -512,7 +517,8 @@ struct mlir_program
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
// check if HW supports xdlops
// check if HW supports xdlops
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_name
);
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
...
@@ -540,7 +546,7 @@ struct mlir_program
...
@@ -540,7 +546,7 @@ struct mlir_program
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
// 2nd pipeline to call
// 2nd pipeline to call
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
name
.
c_str
()
,
"amdgcn-amd-amdhsa"
,
""
);
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
arch
.
c_str
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
code_object_op
op
{};
code_object_op
op
{};
...
@@ -550,16 +556,7 @@ struct mlir_program
...
@@ -550,16 +556,7 @@ struct mlir_program
return
op
;
return
op
;
}
}
void
find_target
()
void
find_target
()
{
target_arch
=
get_device_name
();
}
{
std
::
string
tname
=
get_device_name
();
// HACK: Since MLIR can't handle the full target name
target_name
=
trim
(
split_string
(
tname
,
':'
).
front
());
if
(
tname
.
size
()
!=
target_name
.
size
())
std
::
cout
<<
"*************** WARNING: MLIR may not compile the correct target features for: "
<<
tname
<<
std
::
endl
;
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
{
...
@@ -588,7 +585,7 @@ struct mlir_program
...
@@ -588,7 +585,7 @@ struct mlir_program
mlir_module
mmodule
;
mlir_module
mmodule
;
problem_params
pp
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_
name
;
std
::
string
target_
arch
;
};
};
std
::
string
dump_mlir
(
const
module
&
m
)
std
::
string
dump_mlir
(
const
module
&
m
)
...
@@ -650,6 +647,10 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
...
@@ -650,6 +647,10 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
mlir_program
mp
;
mlir_program
mp
;
mp
.
find_target
();
mp
.
find_target
();
mp
.
parse
(
m
);
mp
.
parse
(
m
);
...
@@ -669,46 +670,9 @@ instruction_ref insert_mlir(module& m,
...
@@ -669,46 +670,9 @@ instruction_ref insert_mlir(module& m,
std
::
vector
<
instruction_ref
>
refs
;
std
::
vector
<
instruction_ref
>
refs
;
std
::
size_t
last
=
0
;
std
::
size_t
last
=
0
;
#ifdef MIGRAPHX_MLIR_BARE_POINTER
refs
.
reserve
(
inputs
.
size
());
refs
.
reserve
(
inputs
.
size
());
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
last
=
refs
.
size
()
-
1
;
last
=
refs
.
size
()
-
1
;
#else
refs
.
reserve
(
inputs
.
size
()
*
15
);
std
::
unordered_map
<
uint64_t
,
instruction_ref
>
literal_map
{};
auto
get_literal
=
[
&
](
uint64_t
value
)
{
auto
fi
=
literal_map
.
find
(
value
);
if
(
fi
!=
literal_map
.
end
())
return
fi
->
second
;
auto
lit
=
m
.
add_literal
(
value
);
literal_map
.
emplace
(
value
,
lit
);
return
lit
;
};
for
(
auto
input
:
inputs
)
{
const
size_t
offset
=
0
;
auto
s
=
input
->
get_shape
();
last
=
refs
.
size
();
refs
.
push_back
(
input
);
refs
.
push_back
(
input
);
refs
.
push_back
(
get_literal
(
offset
));
// offset
// dim sizes
std
::
transform
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
// dim strides
std
::
transform
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
}
#endif
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
output_arg
=
last
;
co
.
output_arg
=
last
;
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
...
...
src/targets/gpu/perfdb.cpp
View file @
b98308b8
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <fstream>
#include <fstream>
#include <mutex>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
...
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
auto
query_miopen_db
(
const
std
::
string
&
query
)
auto
query_miopen_db
(
const
std
::
string
&
query
)
{
{
static
std
::
mutex
g_db_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_db_mutex
);
// TODO: Store db as a static variable
// TODO: Store db as a static variable
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
// Check if db file exists.
// Check if db file exists.
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
b98308b8
...
@@ -51,17 +51,20 @@ struct layernorm_base
...
@@ -51,17 +51,20 @@ struct layernorm_base
}
}
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
auto
s
=
inputs
.
at
(
0
);
auto
s
=
inputs
.
at
(
0
);
auto
t
=
s
.
type
();
if
(
not
mods
.
empty
())
t
=
mods
.
front
()
->
get_output_shapes
().
front
().
type
();
if
(
s
.
scalar
())
if
(
s
.
scalar
())
{
{
return
s
;
return
s
;
}
}
else
if
(
s
.
broadcasted
())
else
if
(
s
.
broadcasted
())
{
{
return
{
s
.
type
()
,
s
.
lens
()};
return
{
t
,
s
.
lens
()};
}
}
else
else
{
{
return
s
.
with_lens
(
s
.
lens
());
return
s
.
with_lens
(
t
,
s
.
lens
());
}
}
}
}
};
};
...
...
src/targets/ref/lowering.cpp
View file @
b98308b8
...
@@ -449,10 +449,10 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
...
@@ -449,10 +449,10 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
{
{
return
op
.
normalize_compute_shape
(
inputs
);
return
op
.
normalize_compute_shape
(
inputs
);
}
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
auto
batch_lens
=
out
put_shape
.
lens
();
auto
batch_lens
=
dyn_out
.
com
put
ed
_shape
.
lens
();
int64_t
tuned_axis
=
tune_axis
(
args
[
0
].
get_shape
().
lens
().
size
(),
op
.
axis
,
op
.
name
());
int64_t
tuned_axis
=
tune_axis
(
args
[
0
].
get_shape
().
lens
().
size
(),
op
.
axis
,
op
.
name
());
std
::
size_t
n_dims
=
batch_lens
[
tuned_axis
];
std
::
size_t
n_dims
=
batch_lens
[
tuned_axis
];
batch_lens
[
tuned_axis
]
=
1
;
batch_lens
[
tuned_axis
]
=
1
;
...
@@ -475,7 +475,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
...
@@ -475,7 +475,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
tuned_axis
]
=
j
;
idx
[
tuned_axis
]
=
j
;
std
::
size_t
index
=
out
put_shape
.
index
(
idx
);
std
::
size_t
index
=
dyn_out
.
com
put
ed
_shape
.
index
(
idx
);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
}
}
...
...
test/gpu/hip.cpp
0 → 100644
View file @
b98308b8
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <test.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/target.hpp>
TEST_CASE
(
tuple_to_from_gpu
)
{
migraphx
::
shape
s1
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int32_type
,
{
2
,
4
}};
std
::
vector
<
float
>
p1_data
=
{
1.1
,
2.2
,
3.3
,
4.4
,
5.5
,
6.6
};
std
::
vector
<
int
>
p2_data
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
};
auto
p1
=
migraphx
::
argument
{
s1
,
p1_data
.
data
()};
auto
p2
=
migraphx
::
argument
{
s2
,
p2_data
.
data
()};
auto
p1_gpu
=
migraphx
::
gpu
::
to_gpu
(
p1
);
auto
p2_gpu
=
migraphx
::
gpu
::
to_gpu
(
p2
);
auto
p_tuple
=
migraphx
::
gpu
::
from_gpu
(
migraphx
::
argument
({
p1_gpu
,
p2_gpu
}));
std
::
vector
<
migraphx
::
argument
>
results
=
p_tuple
.
get_sub_objects
();
std
::
vector
<
float
>
result1
;
results
[
0
].
visit
([
&
](
auto
output
)
{
result1
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
int
>
result2
;
results
[
1
].
visit
([
&
](
auto
output
)
{
result2
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
result1
==
p1_data
);
EXPECT
(
result2
==
p2_data
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/mlir.cpp
View file @
b98308b8
...
@@ -140,7 +140,7 @@ TEST_CASE(conv)
...
@@ -140,7 +140,7 @@ TEST_CASE(conv)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {
arch = "",
kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
}
}
...
@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu)
...
@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {
arch = "",
kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
...
...
src/include/migraphx/int_divide.h
pp
→
test/instruction.c
pp
View file @
b98308b8
...
@@ -21,28 +21,31 @@
...
@@ -21,28 +21,31 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_INT_DIVIDE_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT_DIVIDE_HPP
#include <migraphx/config.hpp>
#include <migraphx/instruction.hpp>
#include <cmath>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include "test.hpp"
namespace
migraphx
{
TEST_CASE
(
check_undefined
)
inline
namespace
MIGRAPHX_INLINE_NS
{
template
<
class
R
,
class
T
,
class
U
>
R
floor_divide
(
T
x
,
U
y
)
{
{
return
R
(
std
::
floor
(
double
(
x
)
/
double
(
y
)));
migraphx
::
module
m
;
}
auto
und
=
m
.
add_instruction
(
migraphx
::
make_op
(
"undefined"
));
auto
cov
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
migraphx
::
shape
::
half_type
}}),
und
);
auto
abs
=
m
.
add_instruction
(
migraphx
::
make_op
(
"abs"
),
cov
);
template
<
class
R
,
class
T
,
class
U
>
migraphx
::
shape
xs
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
R
ceil_divide
(
T
x
,
U
y
)
std
::
vector
<
float
>
datax
=
{
1
,
2
,
3
,
4
,
5
,
6
};
{
return
R
(
std
::
ceil
(
double
(
x
)
/
double
(
y
)
));
auto
lit
=
m
.
add_literal
(
migraphx
::
literal
(
xs
,
datax
));
}
auto
mul
=
m
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
lit
,
lit
);
}
// namespace MIGRAPHX_INLINE_NS
EXPECT
(
und
->
is_undefined
());
}
// namespace migraphx
EXPECT
(
cov
->
is_undefined
());
EXPECT
(
abs
->
is_undefined
());
EXPECT
(
not
lit
->
is_undefined
());
EXPECT
(
not
mul
->
is_undefined
());
}
#endif
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/literal_test.cpp
View file @
b98308b8
...
@@ -49,6 +49,25 @@ TEST_CASE(literal_test)
...
@@ -49,6 +49,25 @@ TEST_CASE(literal_test)
EXPECT
(
l4
.
empty
());
EXPECT
(
l4
.
empty
());
}
}
TEST_CASE
(
literal_nstd_shape_vector
)
{
migraphx
::
shape
nstd_shape
{
migraphx
::
shape
::
float_type
,
{
1
,
3
,
2
,
2
},
{
12
,
1
,
6
,
3
}};
std
::
vector
<
float
>
data
(
12
);
std
::
iota
(
data
.
begin
(),
data
.
end
(),
0
);
auto
l0
=
migraphx
::
literal
{
nstd_shape
,
data
};
// check data buffer is read in correctly
std
::
vector
<
float
>
expected_buffer
=
{
0
,
4
,
8
,
1
,
5
,
9
,
2
,
6
,
10
,
3
,
7
,
11
};
const
auto
*
start
=
reinterpret_cast
<
const
float
*>
(
l0
.
data
());
std
::
vector
<
float
>
l0_data
{
start
,
start
+
12
};
EXPECT
(
l0_data
==
expected_buffer
);
// check that using visit() (that uses a tensor view) gives data in correct order
std
::
vector
<
float
>
results_vector
(
12
);
l0
.
visit
([
&
](
auto
output
)
{
results_vector
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
results_vector
==
data
);
}
TEST_CASE
(
literal_os1
)
TEST_CASE
(
literal_os1
)
{
{
migraphx
::
literal
l
{
1
};
migraphx
::
literal
l
{
1
};
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment