Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
a24ed87e
Unverified
Commit
a24ed87e
authored
Dec 05, 2023
by
Chris Austen
Committed by
GitHub
Dec 05, 2023
Browse files
Merge branch 'develop' into optimize_jenkinsfile
parents
6481cd69
a09dc502
Changes
391
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
977 additions
and
199 deletions
+977
-199
src/onnx/parse_resize.cpp
src/onnx/parse_resize.cpp
+90
-63
src/onnx/parse_scatternd.cpp
src/onnx/parse_scatternd.cpp
+7
-5
src/onnx/parse_slice.cpp
src/onnx/parse_slice.cpp
+7
-5
src/onnx/parse_split.cpp
src/onnx/parse_split.cpp
+26
-5
src/onnx/parse_unique.cpp
src/onnx/parse_unique.cpp
+92
-0
src/onnx/pooling.cpp
src/onnx/pooling.cpp
+247
-0
src/program.cpp
src/program.cpp
+1
-1
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+18
-3
src/quantization.cpp
src/quantization.cpp
+1
-1
src/register_target.cpp
src/register_target.cpp
+4
-0
src/rewrite_pooling.cpp
src/rewrite_pooling.cpp
+131
-17
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+1
-1
src/schedule.cpp
src/schedule.cpp
+2
-2
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+0
-21
src/simplify_dyn_ops.cpp
src/simplify_dyn_ops.cpp
+215
-22
src/simplify_qdq.cpp
src/simplify_qdq.cpp
+103
-35
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+4
-8
src/targets/cpu/CMakeLists.txt
src/targets/cpu/CMakeLists.txt
+15
-9
src/targets/cpu/dnnl.cpp
src/targets/cpu/dnnl.cpp
+1
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+12
-1
No files found.
src/onnx/parse_resize.cpp
View file @
a24ed87e
...
...
@@ -181,6 +181,76 @@ static std::string get_nearest_mode(const onnx_parser::attribute_map& attr)
return
nearest_mode
;
}
static
std
::
vector
<
double
>
get_scales
(
const
onnx_parser
::
attribute_map
&
attr
)
{
std
::
vector
<
double
>
scales
;
if
(
contains
(
attr
,
"scales"
))
{
copy
(
attr
.
at
(
"scales"
).
floats
(),
std
::
back_inserter
(
scales
));
}
return
scales
;
}
static
void
parse_args
(
const
std
::
vector
<
instruction_ref
>&
args
,
const
std
::
vector
<
size_t
>&
in_lens
,
const
std
::
string
&
op_name
,
std
::
vector
<
double
>&
vec_scale
,
std
::
vector
<
std
::
size_t
>&
out_lens
)
{
for
(
const
auto
&
arg
:
args
)
{
if
(
arg
->
name
()
==
"undefined"
or
arg
==
args
.
front
())
{
continue
;
}
// skipped empty input
auto
lens
=
arg
->
get_shape
().
lens
();
if
(
lens
.
empty
())
{
continue
;
}
auto
type
=
arg
->
get_shape
().
type
();
// output size
if
(
type
==
shape
::
int64_type
)
{
auto
arg_out_s
=
arg
->
eval
();
check_arg_empty
(
arg_out_s
,
"PARSE_"
+
op_name
+
": dynamic output size is not supported!"
);
arg_out_s
.
visit
([
&
](
const
auto
&
ol
)
{
out_lens
.
assign
(
ol
.
begin
(),
ol
.
end
());
});
if
(
out_lens
.
size
()
!=
in_lens
.
size
())
{
MIGRAPHX_THROW
(
"PARSE_"
+
op_name
+
": specified output size does not match input size"
);
}
// compute the scale
vec_scale
.
resize
(
in_lens
.
size
());
std
::
transform
(
in_lens
.
begin
(),
in_lens
.
end
(),
out_lens
.
begin
(),
vec_scale
.
begin
(),
[](
auto
iss
,
auto
oss
)
{
return
1.0
*
oss
/
iss
;
});
}
else
{
// scale input
if
(
lens
[
0
]
==
in_lens
.
size
())
{
auto
arg_scale
=
arg
->
eval
();
check_arg_empty
(
arg_scale
,
"PARSE_"
+
op_name
+
": dynamic input scale is not supported!"
);
arg_scale
.
visit
([
&
](
const
auto
&
v
)
{
vec_scale
.
assign
(
v
.
begin
(),
v
.
end
());
});
}
}
}
}
struct
parse_resize
:
op_parser
<
parse_resize
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Resize"
},
{
"Upsample"
}};
}
...
...
@@ -214,72 +284,30 @@ struct parse_resize : op_parser<parse_resize>
std
::
vector
<
std
::
size_t
>
out_lens
(
in_lens
.
size
());
// scale
std
::
vector
<
double
>
vec_scale
;
std
::
vector
<
double
>
vec_scale
=
get_scales
(
info
.
attributes
)
;
for
(
const
auto
&
arg
:
args
)
// If `scales` was not an attribute, it must be an input
if
(
vec_scale
.
empty
())
{
if
(
arg
->
name
()
==
"undefined"
or
arg
==
args
.
front
())
{
continue
;
}
// skipped empty input
auto
lens
=
arg
->
get_shape
().
lens
();
if
(
lens
.
empty
())
{
continue
;
}
auto
type
=
arg
->
get_shape
().
type
();
// output size
if
(
type
==
shape
::
int64_type
)
{
auto
arg_out_s
=
arg
->
eval
();
check_arg_empty
(
arg_out_s
,
"PARSE_"
+
opd
.
op_name
+
": dynamic output size is not supported!"
);
arg_out_s
.
visit
([
&
](
const
auto
&
ol
)
{
out_lens
.
assign
(
ol
.
begin
(),
ol
.
end
());
});
if
(
out_lens
.
size
()
!=
in_lens
.
size
())
{
MIGRAPHX_THROW
(
"PARSE_"
+
opd
.
op_name
+
": specified output size does not match input size"
);
}
// Depending on the args, it *must* populate the `vec_scale`, and might populate
// `out_lens`
parse_args
(
args
,
in_lens
,
opd
.
op_name
,
vec_scale
,
out_lens
);
}
// compute the scale
vec_scale
.
resize
(
in_lens
.
size
());
std
::
transform
(
in_lens
.
begin
(),
in_lens
.
end
(),
out_lens
.
begin
(),
vec_scale
.
begin
(),
[](
auto
iss
,
auto
oss
)
{
return
1.0
*
oss
/
iss
;
});
}
else
{
if
(
in_lens
.
size
()
!=
vec_scale
.
size
())
{
MIGRAPHX_THROW
(
"PARSE_"
+
opd
.
op_name
+
": ranks of input and scale are different!"
);
}
// scale input
if
(
lens
[
0
]
==
in_lens
.
size
())
{
auto
arg_scale
=
arg
->
eval
();
check_arg_empty
(
arg_scale
,
"PARSE_"
+
opd
.
op_name
+
": dynamic input scale is not supported!"
);
arg_scale
.
visit
([
&
](
const
auto
&
v
)
{
vec_scale
.
assign
(
v
.
begin
(),
v
.
end
());
});
if
(
in_lens
.
size
()
!=
vec_scale
.
size
())
{
MIGRAPHX_THROW
(
"PARSE_"
+
opd
.
op_name
+
": ranks of input and scale are different!"
);
}
std
::
transform
(
in_lens
.
begin
(),
in_lens
.
end
(),
vec_scale
.
begin
(),
out_lens
.
begin
(),
[
&
](
auto
idx
,
auto
scale
)
{
return
static_cast
<
std
::
size_t
>
(
idx
*
scale
);
});
}
}
// if the output was not calculated yet, we update it based on the scales
if
(
all_of
(
out_lens
.
cbegin
(),
out_lens
.
cend
(),
[](
auto
o
)
{
return
o
==
0
;
}))
{
std
::
transform
(
in_lens
.
begin
(),
in_lens
.
end
(),
vec_scale
.
begin
(),
out_lens
.
begin
(),
[
&
](
auto
idx
,
auto
scale
)
{
return
static_cast
<
std
::
size_t
>
(
idx
*
scale
);
});
}
shape
out_s
{
in_s
.
type
(),
out_lens
};
...
...
@@ -288,7 +316,6 @@ struct parse_resize : op_parser<parse_resize>
// reshape input to one-dimension
std
::
vector
<
int64_t
>
rsp_lens
=
{
static_cast
<
int64_t
>
(
in_s
.
elements
())};
args
[
0
]
=
info
.
make_contiguous
(
args
[
0
]);
auto
rsp
=
info
.
add_instruction
(
make_op
(
"reshape"
,
{{
"dims"
,
rsp_lens
}}),
args
[
0
]);
if
(
mode
==
"nearest"
)
...
...
src/onnx/parse_scatternd.cpp
View file @
a24ed87e
...
...
@@ -39,15 +39,17 @@ struct parse_scatternd : op_parser<parse_scatternd>
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>&
args
)
const
{
std
::
string
reduction
=
"none"
;
if
(
contains
(
info
.
attributes
,
"reduction"
))
{
if
(
info
.
attributes
.
at
(
"reduction"
).
s
()
==
"add"
)
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_add"
),
args
);
if
(
info
.
attributes
.
at
(
"reduction"
).
s
()
==
"mul"
)
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_mul"
),
args
);
reduction
=
info
.
attributes
.
at
(
"reduction"
).
s
();
if
(
not
contains
({
"none"
,
"add"
,
"mul"
,
"min"
,
"max"
},
reduction
))
{
MIGRAPHX_THROW
(
"PARSE_SCATTERND: unsupported reduction mode "
+
reduction
);
}
}
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_
none"
),
args
);
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_
"
+
reduction
),
args
);
}
};
...
...
src/onnx/parse_slice.cpp
View file @
a24ed87e
...
...
@@ -46,6 +46,9 @@ struct parse_slice : op_parser<parse_slice>
void
always_insert
(
instruction_ref
arg
)
{
op_args
.
insert
(
op_args
.
begin
(),
arg
);
}
/**
* Either insert argument into `this->op_args` or return the constant value of the argument
*/
std
::
vector
<
int64_t
>
insert
(
instruction_ref
arg
)
{
std
::
vector
<
int64_t
>
result
;
...
...
@@ -137,23 +140,22 @@ struct parse_slice : op_parser<parse_slice>
sd
.
always_insert
(
args
.
at
(
0
));
// If axes arg is not given, the default is all of them.
if
(
sd
.
op
.
axes
.
empty
()
and
sd
.
op_args
.
size
()
<
3
)
if
(
sd
.
op
.
axes
.
empty
()
and
sd
.
op_args
.
size
()
<
=
3
)
{
std
::
vector
<
int64_t
>
axes
(
args
[
0
]
->
get_shape
().
ndim
());
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
int64_t
{
0
});
sd
.
op
.
axes
=
axes
;
}
if
(
not
sd
.
steps
.
empty
(
))
if
(
std
::
any_of
(
sd
.
steps
.
begin
(),
sd
.
steps
.
end
(),
[](
auto
s
)
{
return
s
!=
1
;
}
))
{
if
(
sd
.
op
.
starts
.
empty
()
or
sd
.
op
.
ends
.
empty
())
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable starts and ends is not supported"
);
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable starts and/or ends is not supported"
);
if
(
sd
.
op
.
axes
.
empty
())
MIGRAPHX_THROW
(
"PARSE_SLICE: steps and variable axes is not supported"
);
}
assert
(
sd
.
steps
.
empty
()
or
sd
.
steps
.
size
()
==
sd
.
op
.
axes
.
size
());
// If any axes have negative step, prepare to add a "reverse" op
for
(
auto
i
:
range
(
sd
.
steps
.
size
()))
{
...
...
src/onnx/parse_split.cpp
View file @
a24ed87e
...
...
@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
// no split attribute, input is equally divided
else
{
if
((
lens
[
tuned_axis
]
%
info
.
num_outputs
)
!=
0
)
std
::
size_t
num_outputs
=
info
.
num_outputs
;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if
(
contains
(
info
.
attributes
,
"num_outputs"
))
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: input cannot be equally divided into "
+
std
::
to_string
(
info
.
num_outputs
)
+
" splits!"
);
num_outputs
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"num_outputs"
)).
at
<
std
::
size_t
>
();
if
(
num_outputs
!=
info
.
num_outputs
)
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: num_outputs attribute "
+
std
::
to_string
(
num_outputs
)
+
" doesn't match actual number of outputs "
+
std
::
to_string
(
info
.
num_outputs
)
+
"!"
);
}
}
if
(
lens
[
tuned_axis
]
%
num_outputs
==
0
)
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
;
vec_splits
.
resize
(
num_outputs
,
chunk_size
);
}
else
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
+
1
;
std
::
size_t
last_chunk_size
=
lens
[
tuned_axis
]
-
chunk_size
*
(
num_outputs
-
1
);
vec_splits
.
resize
(
num_outputs
-
1
,
chunk_size
);
vec_splits
.
push_back
(
last_chunk_size
);
}
auto
dl
=
lens
[
tuned_axis
]
/
info
.
num_outputs
;
vec_splits
.
resize
(
info
.
num_outputs
,
dl
);
}
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
...
...
src/onnx/parse_unique.cpp
0 → 100644
View file @
a24ed87e
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <optional>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
// generate unique output stream y, given input stream x;
//
// case unsorted:
// input x: [2, 1, 1, 3, 4, 3], attr_sorted = 0;
// output(s):
// y: [2, 1, 3, 4] --- the unique output
// y_indices: [0, 1, 3, 4] --- first incidence, in terms of indices of x
// x_rev_indices: [0, 1, 1, 2, 3, 2] --- x seen in terms of indices of y
// y_count: [1, 2, 2, 1] -- count at each y_index. sum = len(x)
//
// case sorted:
// input x: [2, 1, 1, 3, 4, 3], attr_sorted = 1;
// output(s):
// y: [1, 2, 3, 4] --- the unique output
// y_indices: [1, 0, 3, 4] --- first incidence, in terms of indices of x
// x_rev_indices: [1, 0, 0, 2, 3, 2] --- x seen in terms of indices of y
// y_count: [2, 1, 2, 1] -- count at each y_index. sum = len(x)
struct
parse_unique
:
op_parser
<
parse_unique
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Unique"
}};
}
std
::
vector
<
instruction_ref
>
parse
(
const
op_desc
&
opd
,
const
onnx_parser
&
parser
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
int64_t
sorted
=
1
;
// default = sorted.
if
(
contains
(
info
.
attributes
,
"sorted"
))
sorted
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"sorted"
)).
at
<
int
>
();
std
::
optional
<
int64_t
>
axis
;
if
(
contains
(
info
.
attributes
,
"axis"
))
{
auto
n_dim
=
args
[
0
]
->
get_shape
().
ndim
();
axis
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"axis"
)).
at
<
int
>
();
axis
=
tune_axis
(
n_dim
,
*
axis
,
opd
.
op_name
);
}
migraphx
::
argument
data_arg
=
args
.
back
()
->
eval
();
auto
opr
=
axis
?
migraphx
::
make_op
(
"unique"
,
{{
"axis"
,
*
axis
},
{
"sorted"
,
sorted
}})
:
migraphx
::
make_op
(
"unique"
,
{{
"sorted"
,
sorted
}});
auto
u_opr
=
info
.
add_instruction
(
opr
,
args
.
at
(
0
));
auto
i_y
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
u_opr
);
auto
i_y_idx
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
1
}}),
u_opr
);
auto
i_x_idx
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
2
}}),
u_opr
);
auto
i_count
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
3
}}),
u_opr
);
return
{
i_y
,
i_y_idx
,
i_x_idx
,
i_count
};
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/onnx/pooling.cpp
0 → 100644
View file @
a24ed87e
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/pooling.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/padding.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/ranges.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
value
handle_pooling_values
(
const
op_desc
&
opd
,
onnx_parser
::
node_info
info
,
const
shape
&
in_shape
,
value
values
)
{
auto
kdims
=
in_shape
.
ndim
()
-
2
;
if
(
starts_with
(
opd
.
onnx_name
,
"Global"
)
or
starts_with
(
opd
.
onnx_name
,
"QLinearGlobal"
))
{
// if spatial dimensions are dynamic use dyn_global flag
if
(
in_shape
.
dynamic
()
and
std
::
any_of
(
in_shape
.
dyn_dims
().
cbegin
()
+
2
,
in_shape
.
dyn_dims
().
cend
(),
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
values
[
"dyn_global"
]
=
true
;
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
();
}
else
{
// works with static and fixed dynamic shape
auto
m_lens
=
in_shape
.
max_lens
();
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
(
m_lens
.
begin
()
+
2
,
m_lens
.
end
());
}
}
if
(
contains
(
info
.
attributes
,
"ceil_mode"
))
{
values
[
"ceil_mode"
]
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"ceil_mode"
).
i
());
}
if
(
contains
(
info
.
attributes
,
"strides"
))
{
values
[
"stride"
].
clear
();
copy
(
info
.
attributes
[
"strides"
].
ints
(),
std
::
back_inserter
(
values
[
"stride"
]));
check_attr_sizes
(
kdims
,
values
[
"stride"
].
size
(),
"PARSE_POOLING: inconsistent strides"
);
}
if
(
contains
(
info
.
attributes
,
"kernel_shape"
))
{
values
[
"lengths"
].
clear
();
copy
(
info
.
attributes
[
"kernel_shape"
].
ints
(),
std
::
back_inserter
(
values
[
"lengths"
]));
check_attr_sizes
(
kdims
,
values
[
"lengths"
].
size
(),
"PARSE_POOLING: inconsistent lengths"
);
}
if
(
contains
(
info
.
attributes
,
"dilations"
))
{
values
[
"dilations"
].
clear
();
copy
(
info
.
attributes
[
"dilations"
].
ints
(),
std
::
back_inserter
(
values
[
"dilations"
]));
check_attr_sizes
(
kdims
,
values
[
"dilations"
].
size
(),
"PARSE_POOLING: inconsistent dilations"
);
}
// lp_order attribute
if
(
contains
(
info
.
attributes
,
"p"
))
{
values
[
"lp_order"
]
=
info
.
attributes
.
at
(
"p"
).
i
();
}
// ensure pads available only when auto_pad is "NOT_SET"
check_padding_mode
(
info
,
"POOLING"
);
return
values
;
}
instruction_ref
add_pooling_op
(
const
op_desc
&
opd
,
onnx_parser
::
node_info
info
,
instruction_ref
l0
)
{
std
::
string
mode
=
opd
.
op_name
;
const
std
::
unordered_map
<
std
::
string
,
op
::
pooling_mode
>
mode_map
=
{
{
"max"
,
op
::
pooling_mode
::
max
},
{
"average"
,
op
::
pooling_mode
::
average
},
{
"lpnorm"
,
op
::
pooling_mode
::
lpnorm
}};
if
(
not
contains
(
mode_map
,
mode
))
{
MIGRAPHX_THROW
(
"PARSE_POOLING: onnx pooling mode must be [
\"
max
\"
,
\"
average
\"
,
\"
lpnorm
\"
]"
);
}
operation
op
=
make_op
(
"pooling"
,
{{
"mode"
,
mode_map
.
at
(
mode
)}});
value
values
=
op
.
to_value
();
auto
in_shape
=
l0
->
get_shape
();
assert
(
in_shape
.
ndim
()
>
2
);
auto
kdims
=
in_shape
.
ndim
()
-
2
;
values
=
handle_pooling_values
(
opd
,
info
,
in_shape
,
values
);
// count include padding, if count include pad is 1, we always use
// explicit pad
int
count_include_pad
=
0
;
if
(
contains
(
info
.
attributes
,
"count_include_pad"
))
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape"
);
}
count_include_pad
=
info
.
attributes
.
at
(
"count_include_pad"
).
i
();
}
std
::
vector
<
int64_t
>
paddings
;
float
pad_val
=
((
mode
==
"max"
)
?
std
::
numeric_limits
<
float
>::
lowest
()
:
0.0
f
);
if
(
contains
(
info
.
attributes
,
"pads"
))
{
values
[
"padding"
].
clear
();
copy
(
info
.
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
paddings
));
check_attr_sizes
(
kdims
,
paddings
.
size
()
/
2
,
"PARSE_POOLING: inconsistent explicit paddings"
);
}
if
(
paddings
.
size
()
!=
2
*
kdims
)
{
paddings
.
resize
(
kdims
*
2
);
std
::
fill_n
(
paddings
.
begin
(),
2
*
kdims
,
0
);
}
if
(
values
[
"padding"
].
size
()
!=
kdims
)
{
values
[
"padding"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"padding"
].
begin
(),
kdims
,
0
);
}
if
(
values
[
"stride"
].
size
()
!=
kdims
)
{
values
[
"stride"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
}
if
(
values
[
"dilations"
].
size
()
!=
kdims
)
{
values
[
"dilations"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"dilations"
].
begin
(),
kdims
,
1
);
}
// used to calculate the supposed output shape
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
// TODO: add parsing for dilations
if
(
contains
(
info
.
attributes
,
"auto_pad"
)
and
to_upper
(
info
.
attributes
[
"auto_pad"
].
s
())
!=
"NOTSET"
)
{
auto
auto_pad
=
to_upper
(
info
.
attributes
[
"auto_pad"
].
s
());
// don't use the given padding sizes, if any
// values["padding"].clear();
if
(
in_shape
.
dynamic
())
{
// set padding_mode to trigger auto padding at runtime
bool
is_same_upper
=
(
auto_pad
.
find
(
"SAME_UPPER"
)
!=
std
::
string
::
npos
);
values
[
"padding_mode"
]
=
is_same_upper
?
to_value
(
op
::
padding_mode_t
::
same_upper
)
:
to_value
(
op
::
padding_mode_t
::
same_lower
);
}
else
{
// Calculate auto padding
// dilations (argument 4) not supported; default to all 1's
cal_auto_padding_size
(
info
,
values
,
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
values
[
"dilations"
].
to_vector
<
std
::
size_t
>
(),
in_shape
.
lens
(),
paddings
);
values
[
"padding"
]
=
paddings
;
// default padding_mode indicates that padding sizes are not calculated dynamically
values
[
"padding_mode"
]
=
migraphx
::
op
::
padding_mode_t
::
default_
;
}
}
std
::
vector
<
int64_t
>
slice_start
;
std
::
vector
<
int64_t
>
slice_end
;
tune_padding_size
(
values
,
paddings
,
count_include_pad
,
slice_start
);
if
(
not
slice_start
.
empty
())
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape"
);
}
// calculate expected output shape
orig_padding
.
insert
(
orig_padding
.
begin
()
+
kdims
,
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
(),
2
,
0
);
op
::
pad
pad
{
orig_padding
,
0.0
f
};
shape
padded_shape
=
pad
.
compute_shape
({
l0
->
get_shape
()});
// make an op just to get its output shape
auto
out_lens
=
make_op
(
"pooling"
,
values
).
compute_shape
({
padded_shape
}).
lens
();
// compute slice_end information
slice_end
.
resize
(
slice_start
.
size
());
std
::
transform
(
out_lens
.
begin
()
+
2
,
out_lens
.
end
(),
slice_start
.
begin
(),
slice_end
.
begin
(),
[](
auto
i
,
auto
j
)
{
return
i
+
j
;
});
}
values
[
"padding"
]
=
std
::
vector
<
size_t
>
(
paddings
.
begin
(),
paddings
.
end
());
check_asym_padding
(
info
,
l0
,
paddings
,
values
,
count_include_pad
,
pad_val
);
op
.
from_value
(
values
);
auto
l1
=
info
.
add_instruction
(
op
,
l0
);
if
(
not
slice_start
.
empty
())
{
std
::
vector
<
int64_t
>
axes
(
kdims
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
l1
=
info
.
add_instruction
(
make_op
(
"slice"
,
{{
"axes"
,
axes
},
{
"starts"
,
slice_start
},
{
"ends"
,
slice_end
}}),
l1
);
}
return
l1
;
}
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/program.cpp
View file @
a24ed87e
...
...
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
os
<<
std
::
endl
;
os
<<
"Batch size: "
<<
batch
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"/sec"
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"
inferences
/sec"
<<
std
::
endl
;
os
<<
"Total time: "
<<
total_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total instructions time: "
<<
total_instruction_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Overhead time: "
<<
overhead_time
<<
"ms"
...
...
src/py/migraphx_py.cpp
View file @
a24ed87e
...
...
@@ -40,7 +40,7 @@
#include <migraphx/json.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/float8.hpp>
#ifdef HAVE_GPU
#include <migraphx/gpu/hip.hpp>
#endif
...
...
@@ -144,6 +144,18 @@ struct npy_format_descriptor<half>
static
constexpr
auto
name
()
{
return
_
(
"half"
);
}
};
template
<
>
struct
npy_format_descriptor
<
migraphx
::
fp8
::
fp8e4m3fnuz
>
{
static
std
::
string
format
()
{
// following: https://docs.python.org/3/library/struct.html#format-characters
// TODO: need to figure out correct encoding
return
"z"
;
}
static
constexpr
auto
name
()
{
return
_
(
"fp8e4m3fnuz"
);
}
};
}
// namespace detail
}
// namespace pybind11
...
...
@@ -472,7 +484,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
map_dyn_input_dims
,
bool
skip_unknown_operators
,
bool
print_program_on_error
,
int64_t
max_loop_iterations
)
{
int64_t
max_loop_iterations
,
int64_t
limit_max_iterations
)
{
migraphx
::
onnx_options
options
;
options
.
default_dim_value
=
default_dim_value
;
options
.
default_dyn_dim_value
=
default_dyn_dim_value
;
...
...
@@ -481,6 +494,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
options
.
skip_unknown_operators
=
skip_unknown_operators
;
options
.
print_program_on_error
=
print_program_on_error
;
options
.
max_loop_iterations
=
max_loop_iterations
;
options
.
limit_max_iterations
=
limit_max_iterations
;
return
migraphx
::
parse_onnx
(
filename
,
options
);
},
"Parse onnx file"
,
...
...
@@ -492,7 +506,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
migraphx
::
shape
::
dynamic_dimension
>>
(),
py
::
arg
(
"skip_unknown_operators"
)
=
false
,
py
::
arg
(
"print_program_on_error"
)
=
false
,
py
::
arg
(
"max_loop_iterations"
)
=
10
);
py
::
arg
(
"max_loop_iterations"
)
=
10
,
py
::
arg
(
"limit_max_iterations"
)
=
std
::
numeric_limits
<
uint16_t
>::
max
());
m
.
def
(
"parse_onnx_buffer"
,
...
...
src/quantization.cpp
View file @
a24ed87e
...
...
@@ -147,8 +147,8 @@ void quantize_int8(program& prog,
run_passes
(
prog
,
{
quantize_int8_pass
{
ins_names
,
*
int8_quant_params
},
optimize_module
{},
simplify_qdq
{},
optimize_module
{},
dead_code_elimination
{}});
}
...
...
src/register_target.cpp
View file @
a24ed87e
...
...
@@ -56,7 +56,11 @@ target make_target(const std::string& name)
{
if
(
not
contains
(
target_map
(),
name
))
{
#ifdef _WIN32
std
::
string
target_name
=
"migraphx_"
+
name
+
".dll"
;
#else
std
::
string
target_name
=
"libmigraphx_"
+
name
+
".so"
;
#endif
store_target_lib
(
dynamic_loader
(
target_name
));
}
const
auto
it
=
target_map
().
find
(
name
);
...
...
src/rewrite_pooling.cpp
View file @
a24ed87e
...
...
@@ -35,6 +35,110 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
static
void
replace_with_reduce
(
module
&
m
,
instruction_ref
ins
)
{
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
auto
lens
=
s
.
lens
();
std
::
vector
<
std
::
int64_t
>
axes
(
lens
.
size
()
-
2
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
// average pooling
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
{
m
.
replace_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
());
}
// max pooling
else
{
m
.
replace_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
());
}
}
static
void
replace_dilations_with_gather_pooling
(
module
&
m
,
instruction_ref
ins
)
{
// TODO remove this when MIOpen supports dilated pooling
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
// Ignore N, C axes
std
::
vector
<
size_t
>
dims
=
{
s
.
lens
().
cbegin
()
+
2
,
s
.
lens
().
cend
()};
bool
default_padding
=
std
::
all_of
(
op
.
padding
.
cbegin
(),
op
.
padding
.
cend
(),
[](
auto
i
)
{
return
i
==
0
;
});
if
(
not
default_padding
)
{
for
(
size_t
idx
{
0
};
idx
<
op
.
padding
.
size
();
++
idx
)
{
// We need to pad both ends
dims
[
idx
]
+=
op
.
padding
.
at
(
idx
)
*
2
;
}
}
std
::
vector
<
size_t
>
kernels
=
op
.
lengths
;
std
::
vector
<
size_t
>
strides
=
op
.
stride
;
std
::
vector
<
size_t
>
dilations
=
op
.
dilations
;
std
::
vector
<
std
::
vector
<
int
>>
axis_indices
;
axis_indices
.
resize
(
dims
.
size
());
for
(
auto
idx
{
0
};
idx
<
dims
.
size
();
++
idx
)
{
// Only consider if iw fits into the window
for
(
size_t
stride
{
0
};
stride
<
dims
.
at
(
idx
)
-
dilations
.
at
(
idx
)
*
(
kernels
.
at
(
idx
)
-
1
);
stride
+=
strides
.
at
(
idx
))
{
for
(
size_t
step
{
0
};
step
<
kernels
.
at
(
idx
);
++
step
)
{
axis_indices
.
at
(
idx
).
push_back
(
stride
+
dilations
.
at
(
idx
)
*
step
);
}
}
}
auto
elements
=
ins
->
inputs
().
front
();
if
(
not
default_padding
)
{
// Pad supports asym, we need to provide both ends
std
::
vector
<
size_t
>
padding
(
2
*
s
.
lens
().
size
(),
0
);
// Format will be e.g {N, C, P1, P2, N, C, P1, P2}
for
(
size_t
idx
{
0
};
idx
<
op
.
padding
.
size
();
++
idx
)
{
// Ignore N, C axes
padding
.
at
(
2
+
idx
)
=
op
.
padding
.
at
(
idx
);
padding
.
at
(
2
+
idx
+
s
.
lens
().
size
())
=
op
.
padding
.
at
(
idx
);
}
// Default value needed for Max pooling
elements
=
m
.
insert_instruction
(
ins
,
make_op
(
"pad"
,
{{
"pads"
,
padding
},
{
"value"
,
std
::
numeric_limits
<
float
>::
lowest
()}}),
elements
);
}
for
(
auto
idx
{
0
};
idx
<
axis_indices
.
size
();
++
idx
)
{
migraphx
::
shape
s_indices
{
migraphx
::
shape
::
int32_type
,
{
axis_indices
.
at
(
idx
).
size
()}};
auto
indices
=
m
.
add_literal
(
migraphx
::
literal
{
s_indices
,
axis_indices
.
at
(
idx
)});
elements
=
m
.
insert_instruction
(
ins
,
make_op
(
"gather"
,
{{
"axis"
,
idx
+
2
/*ignore N,C*/
}}),
elements
,
indices
);
}
// Ignore padding
std
::
vector
<
size_t
>
new_padding
(
kernels
.
size
(),
0
);
// The kernel window elements are places next to each other. E.g. {x1, y1, x2, y2, ...}
// We need to skip them to not overlap
std
::
vector
<
size_t
>
new_strides
(
kernels
);
// Ignore dilations
std
::
vector
<
size_t
>
new_dilations
(
kernels
.
size
(),
1
);
m
.
replace_instruction
(
ins
,
make_op
(
"pooling"
,
{{
"mode"
,
op
.
mode
},
{
"padding"
,
new_padding
},
{
"stride"
,
new_strides
},
{
"lengths"
,
kernels
},
{
"dilations"
,
new_dilations
}}),
elements
);
}
void
rewrite_pooling
::
apply
(
module
&
m
)
const
{
for
(
auto
ins
:
iterator_for
(
m
))
...
...
@@ -43,26 +147,36 @@ void rewrite_pooling::apply(module& m) const
continue
;
if
(
ins
->
inputs
().
empty
())
continue
;
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
if
(
not
std
::
all_of
(
op
.
padding
.
begin
(),
op
.
padding
.
end
(),
[](
auto
i
)
{
return
i
==
0
;
}))
continue
;
if
(
not
std
::
all_of
(
op
.
stride
.
begin
(),
op
.
stride
.
end
(),
[](
auto
i
)
{
return
i
==
1
;
}))
continue
;
auto
lens
=
s
.
lens
();
if
(
not
std
::
equal
(
lens
.
begin
()
+
2
,
lens
.
end
(),
op
.
lengths
.
begin
(),
op
.
lengths
.
end
()))
continue
;
std
::
vector
<
std
::
int64_t
>
axes
(
lens
.
size
()
-
2
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
// average pooling
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
bool
same_kernel_as_shape
=
std
::
equal
(
s
.
lens
().
cbegin
()
+
2
,
s
.
lens
().
cend
(),
op
.
lengths
.
cbegin
(),
op
.
lengths
.
cend
());
bool
default_strides
=
std
::
all_of
(
op
.
stride
.
cbegin
(),
op
.
stride
.
cend
(),
[](
auto
i
)
{
return
i
==
1
;
});
bool
default_padding
=
std
::
all_of
(
op
.
padding
.
cbegin
(),
op
.
padding
.
cend
(),
[](
auto
i
)
{
return
i
==
0
;
});
bool
default_dilations
=
std
::
all_of
(
op
.
dilations
.
cbegin
(),
op
.
dilations
.
cend
(),
[](
auto
i
)
{
return
i
==
1
;
});
if
(
same_kernel_as_shape
and
default_strides
and
default_padding
and
default_dilations
)
{
m
.
replace_
instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
()
);
replace_
with_reduce
(
m
,
ins
);
}
// max pooling
else
else
if
(
not
default_dilations
)
{
m
.
replace_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
());
// Dilated AvgPool with padding is not supported
if
(
not
default_padding
and
op
.
mode
==
op
::
pooling_mode
::
average
)
{
continue
;
}
auto
size
=
std
::
accumulate
(
s
.
lens
().
cbegin
(),
s
.
lens
().
cend
(),
1
,
std
::
multiplies
<
size_t
>
());
// Can't handle too much size because of literal size
if
(
size
>
100000
)
{
continue
;
}
replace_dilations_with_gather_pooling
(
m
,
ins
);
}
}
}
...
...
src/rewrite_quantization.cpp
View file @
a24ed87e
...
...
@@ -47,7 +47,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()}}),
x
);
}
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
round
"
),
div
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
nearbyint
"
),
div
);
if
(
ins
->
inputs
().
size
()
==
3
)
{
...
...
src/schedule.cpp
View file @
a24ed87e
...
...
@@ -27,7 +27,7 @@
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/
simple_
par_for.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/dom_info.hpp>
...
...
@@ -461,7 +461,7 @@ struct stream_info
std
::
back_inserter
(
index_to_ins
),
[](
auto
&&
it
)
{
return
it
.
first
;
});
par_for
(
concur_ins
.
size
(),
[
&
](
auto
ins_index
,
auto
tid
)
{
simple_
par_for
(
concur_ins
.
size
(),
[
&
](
auto
ins_index
,
auto
tid
)
{
auto
merge_first
=
index_to_ins
[
ins_index
];
assert
(
concur_ins
.
count
(
merge_first
)
>
0
);
auto
&
merge_second
=
concur_ins
.
at
(
merge_first
);
...
...
src/simplify_algebra.cpp
View file @
a24ed87e
...
...
@@ -941,15 +941,6 @@ struct find_splits
{
auto
split
=
i
->
inputs
()[
split_idx
];
assert
(
split
->
name
()
==
"slice"
);
// Insert contiguous for reshapes
auto
outputs
=
i
->
outputs
();
for
(
auto
output
:
outputs
)
{
if
(
output
->
name
()
!=
"reshape"
)
continue
;
auto
x
=
m
.
insert_instruction
(
output
,
make_op
(
"contiguous"
),
i
);
m
.
replace_instruction
(
output
,
output
->
get_operator
(),
x
);
}
m
.
replace_instruction
(
i
,
split
->
get_operator
(),
c
);
}
...
...
@@ -1181,13 +1172,6 @@ struct find_conv_dot_horiz_fusion
for
(
auto
arg
:
range
(
start
,
last
))
{
auto
outputs
=
arg
->
outputs
();
for
(
auto
output
:
outputs
)
{
if
(
output
->
name
()
!=
"reshape"
)
continue
;
auto
x
=
m
.
insert_instruction
(
output
,
make_op
(
"contiguous"
),
arg
);
m
.
replace_instruction
(
output
,
output
->
get_operator
(),
x
);
}
int64_t
len
=
arg
->
get_shape
().
lens
()[
axis
];
m
.
replace_instruction
(
...
...
@@ -1487,11 +1471,6 @@ struct find_split_reshape
slc_axis_len
;
});
// insert the reshape instruction and add contiguous if needed
if
(
not
input
->
get_shape
().
standard
())
{
input
=
m
.
insert_instruction
(
std
::
next
(
input
),
make_op
(
"contiguous"
),
input
);
}
auto
rsp_ins
=
m
.
insert_instruction
(
std
::
next
(
input
),
make_op
(
"reshape"
,
{{
"dims"
,
rsp_out_lens
}}),
input
);
...
...
src/simplify_dyn_ops.cpp
View file @
a24ed87e
...
...
@@ -22,8 +22,10 @@
* THE SOFTWARE.
*/
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/op/slice.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/literal.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -32,6 +34,10 @@ inline namespace MIGRAPHX_INLINE_NS {
* Convert 2 input static shape broadcast/multibroadcast into 1 input version.
* Some compiler passes (ex. simplify_algebra) only support the 1 input versions
* of the broadcasting operators.
* From:
* broadcast_op(argument_with_static_shape, argument_with_static_shape)
* To:
* broadcast_op(argument_with_static_shape); broadcast_op.out_lens = constant_output_dims
*/
struct
find_static_2in_broadcasts
{
...
...
@@ -60,8 +66,65 @@ struct find_static_2in_broadcasts
};
/**
* Simplify slice with variable `starts` and `ends` to the constant version if
* the `input_starts` and `input_ends` inputs are constant.
* Simplify slice with 2 inputs to the 1 input version if inputs[1] is constant.
* From:
* slice(data, constant_input); two attributes set
* To:
* slice(data); slice.starts, slice.ends. slice.axes set
*/
struct
find_const_2in_slice
{
auto
matcher
()
const
{
return
match
::
name
(
"slice"
)(
match
::
nargs
(
2
),
match
::
arg
(
1
)(
match
::
is_constant
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
auto
slice_op
=
any_cast
<
op
::
slice
>
(
ins
->
get_operator
());
auto
set_attrs
=
slice_op
.
get_set_attributes
();
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
std
::
vector
<
int64_t
>
axes_vec
;
if
(
set_attrs
==
op
::
slice
::
ends_axes
)
{
// slice(data, starts)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_vec
=
slice_op
.
ends
;
axes_vec
=
slice_op
.
axes
;
}
else
if
(
set_attrs
==
op
::
slice
::
starts_axes
)
{
// slice(data, ends)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
starts_vec
=
slice_op
.
starts
;
axes_vec
=
slice_op
.
axes
;
}
else
{
// slice(data, axes)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
starts_vec
=
slice_op
.
starts
;
ends_vec
=
slice_op
.
ends
;
}
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
};
/**
* Simplify slice with 3 inputs to the 1 input version if inputs[1:2] are constant.
* From:
* slice(data, constant_input1, constant_input2); one attribute set
* To:
* slice(data); slice.starts, slice.ends. slice.axes set
*/
struct
find_const_3in_slice
{
...
...
@@ -76,27 +139,51 @@ struct find_const_3in_slice
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
())
auto
slice_op
=
any_cast
<
op
::
slice
>
(
ins
->
get_operator
());
auto
set_attrs
=
slice_op
.
get_set_attributes
();
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
std
::
vector
<
int64_t
>
axes_vec
;
if
(
set_attrs
==
op
::
slice
::
axes_only
)
{
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
starts_arg
.
visit
([
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_arg
.
visit
([
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
slice_val
=
ins
->
get_operator
().
to_value
();
auto
axes_vec
=
slice_val
.
at
(
"axes"
).
to_vector
<
int64_t
>
();
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
// slice(data, starts, ends)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
inputs
.
at
(
2
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
axes_vec
=
slice_op
.
axes
;
}
else
if
(
set_attrs
==
op
::
slice
::
ends_only
)
{
// slice(data, starts, axes)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
inputs
.
at
(
2
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_vec
=
slice_op
.
ends
;
}
else
{
// slice(data, ends, axes)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
inputs
.
at
(
2
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
starts_vec
=
slice_op
.
starts
;
}
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
};
/**
* Simplify slice with variable `starts`, `ends`, and `input_axes` to the constant version if
* the `input_starts`, `input_ends`, and `input_axes` inputs are constant.
* Simplify slice with 4 inputs to the 1 input version if inputs[1:3] are constant.
* From:
* slice(data, constant_starts, constant_ends, constant_axes)
* To:
* slice(data); slice.starts, slice.ends. slice.axes set
*/
struct
find_const_4in_slice
{
...
...
@@ -112,9 +199,9 @@ struct find_const_4in_slice
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
argument
axes_arg
=
inputs
.
at
(
3
)
->
eval
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
(
false
);
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
(
false
);
argument
axes_arg
=
inputs
.
at
(
3
)
->
eval
(
false
);
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
()
and
not
axes_arg
.
empty
())
{
std
::
vector
<
int64_t
>
starts_vec
;
...
...
@@ -131,10 +218,116 @@ struct find_const_4in_slice
}
};
/**
* Simplify dimensions_of to a literal when the input arugment has a static shape
* or the dynamic dimensions from `start` to `end` are fixed.
*/
struct
find_static_dimensions_of
{
auto
matcher
()
const
{
return
match
::
name
(
"dimensions_of"
)();
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
input
=
ins
->
inputs
().
at
(
0
);
auto
dimensions_of_value
=
ins
->
get_operator
().
to_value
();
auto
start
=
dimensions_of_value
.
at
(
"start"
).
to
<
std
::
size_t
>
();
auto
end
=
dimensions_of_value
.
at
(
"end"
).
to
<
std
::
size_t
>
();
if
(
input
->
get_shape
().
dynamic
())
{
// check if dynamic dimensions from start to end are fixed
auto
dds
=
input
->
get_shape
().
dyn_dims
();
if
(
std
::
any_of
(
dds
.
begin
()
+
start
,
dds
.
begin
()
+
end
,
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
return
;
}
}
std
::
size_t
output_ndim
=
end
-
start
;
std
::
vector
<
int64_t
>
vec_shape
(
output_ndim
);
migraphx
::
shape
s
(
migraphx
::
shape
::
int64_type
,
{
output_ndim
});
std
::
vector
<
std
::
size_t
>
input_lens
=
input
->
get_shape
().
to_static
(
1
).
lens
();
std
::
transform
(
input_lens
.
begin
()
+
start
,
input_lens
.
begin
()
+
end
,
vec_shape
.
begin
(),
[](
auto
i
)
{
return
int64_t
(
i
);
});
migraphx
::
shape
output_shape
{
migraphx
::
shape
::
int64_type
,
{
end
-
start
}};
auto
lit_ins
=
m
.
add_literal
(
migraphx
::
literal
{
output_shape
,
vec_shape
});
m
.
replace_instruction
(
ins
,
lit_ins
);
}
};
/**
* Simplify allocate into 2 argument reshape that has constant output dimensions into a static 1
* argument reshape. Intended to simplify what ONNX parse_reshape creates for dynamic reshapes.
* This matcher can be generalized to matching reshape(data, static_shape_output_tensor).
* From:
* x = allocate(constant_output_dims) -> reshape(data, x)
* To:
* reshape(data); reshape.dims = constant_output_dims
*/
struct
find_const_alloc_reshapes
{
auto
matcher
()
const
{
return
match
::
name
(
"reshape"
)(
match
::
nargs
(
2
),
match
::
arg
(
1
)(
match
::
name
(
"allocate"
)(
match
::
is_constant
())));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
reshape_ins
=
mr
.
result
;
auto
reshape_inputs
=
reshape_ins
->
inputs
();
auto
alloc_ins
=
reshape_inputs
.
at
(
1
);
argument
output_dims_arg
=
alloc_ins
->
inputs
().
at
(
0
)
->
eval
(
false
);
std
::
vector
<
int64_t
>
output_dims_vec
;
output_dims_arg
.
visit
(
[
&
](
auto
output
)
{
output_dims_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
m
.
replace_instruction
(
reshape_ins
,
make_op
(
"reshape"
,
{{
"dims"
,
output_dims_vec
}}),
reshape_inputs
.
at
(
0
));
// have dead_code_elimination remove the previous allocate
}
};
/**
* Simplify allocate into fill operator that has constant output dimensions and constant value.
* The allocate into fill instructions is what is produced when parsing the ONNX
* ConstantOfShape operator. This replacement could be handled with propagate_constant, but
* would rather have the simplification happen earlier during compiling.
* This matcher can be generalized to matching fill(constant_value, static_shape_output_tensor).
* From:
* x = allocate(constant_ouptut_dims) -> fill(constant_value, x)
* To:
* literal
*/
struct
find_const_alloc_fill
{
auto
matcher
()
const
{
return
match
::
name
(
"fill"
)(
match
::
arg
(
0
)(
match
::
is_constant
()),
match
::
arg
(
1
)(
match
::
name
(
"allocate"
)(
match
::
is_constant
())));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
fill_ins
=
mr
.
result
;
auto
fill_arg
=
fill_ins
->
eval
(
false
);
auto
l
=
m
.
add_literal
(
fill_arg
.
get_shape
(),
fill_arg
.
data
());
m
.
replace_instruction
(
fill_ins
,
l
);
}
};
void
simplify_dyn_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_static_2in_broadcasts
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
match
::
find_matches
(
m
,
find_static_dimensions_of
{},
find_const_alloc_reshapes
{},
find_static_2in_broadcasts
{},
find_const_2in_slice
{},
find_const_3in_slice
{},
find_const_4in_slice
{},
find_const_alloc_fill
{});
}
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/simplify_qdq.cpp
View file @
a24ed87e
...
...
@@ -45,77 +45,145 @@ std::unordered_set<std::string> get_quantizable_op_names()
return
s
;
}
MIGRAPHX_PRED_MATCHER
(
has_same_value
,
instruction_ref
ins
)
struct
match_find_quantizable_ops
{
if
(
ins
->
name
()
!=
"@literal"
)
return
false
;
bool
all_same
=
false
;
ins
->
get_literal
().
visit
([
&
](
auto
s
)
{
all_same
=
std
::
all_of
(
s
.
begin
()
+
1
,
s
.
end
(),
[
&
](
const
auto
&
scale
)
{
return
float_equal
(
scale
,
s
.
front
());
static
bool
is_valid_scale
(
instruction_ref
scale
,
std
::
vector
<
std
::
size_t
>
lens
,
std
::
size_t
axis
)
{
return
scale
->
get_shape
().
scalar
()
or
scale
->
get_shape
().
elements
()
==
lens
.
at
(
axis
);
}
static
bool
is_valid_zero_point
(
instruction_ref
zp
)
{
if
(
not
zp
->
can_eval
())
return
false
;
bool
all_zeros
=
false
;
zp
->
eval
().
visit
([
&
](
auto
z
)
{
all_zeros
=
std
::
all_of
(
z
.
begin
(),
z
.
end
(),
[
&
](
auto
val
)
{
return
float_equal
(
val
,
0
);
});
});
});
return
all_same
;
}
return
all_zeros
;
}
struct
match_find_quantizable_ops
{
static
auto
scale_broadcast_op
(
instruction_ref
scale
,
std
::
vector
<
std
::
size_t
>
lens
,
std
::
size_t
axis
)
{
if
(
scale
->
get_shape
().
scalar
())
{
return
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
lens
}});
}
else
{
return
migraphx
::
make_op
(
"broadcast"
,
{{
"out_lens"
,
lens
},
{
"axis"
,
axis
}});
}
}
static
auto
dequantizelinear_op
(
const
std
::
string
&
name
,
const
std
::
string
&
scale
)
// Helper function to insert quantized versions of any broadcasts and transpose ops that
// occur between dequantizelinear and the quantized op
static
auto
propagate_quantized_ins
(
module
&
m
,
const
instruction_ref
dqins
,
const
instruction_ref
qop
)
{
auto
qinp
=
dqins
->
inputs
().
front
();
auto
next_ins
=
dqins
;
while
(
next_ins
!=
qop
)
{
if
(
next_ins
->
name
()
!=
"dequantizelinear"
)
{
qinp
=
m
.
insert_instruction
(
qop
,
next_ins
->
get_operator
(),
qinp
);
}
next_ins
=
next_ins
->
outputs
().
front
();
}
return
qinp
;
}
static
auto
dequantizelinear_op
(
const
std
::
string
&
scale
,
const
std
::
string
&
zp
)
{
return
match
::
name
(
"dequantizelinear"
)(
match
::
arg
(
0
)(
match
::
skip
(
match
::
name
(
"quantizelinear"
))(
match
::
any
()
.
bind
(
name
)
)),
match
::
arg
(
1
)(
match
::
skip_broadcasts
(
has_same_value
().
bind
(
scale
))),
match
::
arg
(
2
)(
match
::
skip_broadcasts
(
match
::
all_of
(
match
::
has_value
(
0
)
))));
match
::
arg
(
0
)(
match
::
skip
(
match
::
name
(
"quantizelinear"
))(
match
::
any
())),
match
::
arg
(
1
)(
match
::
skip_broadcasts
(
match
::
is_constant
().
bind
(
scale
))),
match
::
arg
(
2
)(
match
::
skip_broadcasts
(
match
::
is_constant
().
bind
(
zp
))));
}
auto
matcher
()
const
{
return
match
::
name
(
get_quantizable_op_names
())(
match
::
arg
(
0
)(
dequantizelinear_op
(
"x1"
,
"scale1"
)),
match
::
arg
(
1
)(
dequantizelinear_op
(
"x2"
,
"scale2"
)));
match
::
arg
(
0
)(
match
::
skip_broadcasts_transposes_contiguous
(
dequantizelinear_op
(
"scale1"
,
"zp1"
).
bind
(
"dq1"
))),
match
::
arg
(
1
)(
match
::
skip_broadcasts_transposes_contiguous
(
dequantizelinear_op
(
"scale2"
,
"zp2"
).
bind
(
"dq2"
))));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
qop
=
r
.
result
;
auto
q1
=
r
.
instructions
[
"
x
1"
];
auto
q2
=
r
.
instructions
[
"
x
2"
];
auto
d
q1
=
r
.
instructions
[
"
dq
1"
];
auto
d
q2
=
r
.
instructions
[
"
dq
2"
];
auto
scale1
=
r
.
instructions
[
"scale1"
];
auto
scale2
=
r
.
instructions
[
"scale2"
];
auto
zp1
=
r
.
instructions
[
"zp1"
];
auto
zp2
=
r
.
instructions
[
"zp2"
];
// Only INT8 type currently supported
if
(
q1
->
get_shape
().
type
()
!=
migraphx
::
shape
::
int8_type
or
q2
->
get_shape
().
type
()
!=
migraphx
::
shape
::
int8_type
)
if
(
d
q1
->
inputs
().
front
()
->
get_shape
().
type
()
!=
migraphx
::
shape
::
int8_type
or
d
q2
->
inputs
().
front
()
->
get_shape
().
type
()
!=
migraphx
::
shape
::
int8_type
)
return
;
double
scale
;
visit_all
(
scale1
->
get_literal
(),
scale2
->
get_literal
(
))
(
[
&
](
const
auto
s1
,
const
auto
s2
)
{
scale
=
s1
.
front
()
*
s2
.
front
();
})
;
// Only symmetric quantization supported (ie. non-zero zero_points not allowed)
if
(
not
(
is_valid_zero_point
(
zp1
)
and
is_valid_zero_point
(
zp2
))
)
return
;
// Only support scalar and 1D scales
if
(
scale1
->
get_shape
().
lens
().
size
()
!=
1
or
scale2
->
get_shape
().
lens
().
size
()
!=
1
)
return
;
// Propagate q1 and q2 through any broadcasts and transposes before qop
auto
qop_args
=
qop
->
inputs
();
qop_args
.
at
(
0
)
=
q1
;
qop_args
.
at
(
1
)
=
q2
;
qop_args
.
at
(
0
)
=
propagate_quantized_ins
(
m
,
dq1
,
qop
)
;
qop_args
.
at
(
1
)
=
propagate_quantized_ins
(
m
,
dq2
,
qop
)
;
instruction_ref
dq
;
instruction_ref
dq
_scale
;
instruction_ref
out
_scale
;
instruction_ref
zero_point
;
if
(
qop
->
name
()
==
"convolution"
)
{
auto
conv_val
=
qop
->
get_operator
().
to_value
();
dq
=
m
.
insert_instruction
(
qop
,
migraphx
::
make_op
(
"quant_convolution"
,
conv_val
),
qop_args
);
auto
out_lens
=
dq
->
get_shape
().
lens
();
// Input scale should always be scalar and weight scale can be scalar or 1D of the
// same lens as the output channel dim (dim 1 in the output)
if
(
not
(
is_valid_scale
(
scale1
,
out_lens
,
1
)
and
is_valid_scale
(
scale2
,
out_lens
,
1
)))
return
;
auto
s1_bcast
=
m
.
insert_instruction
(
qop
,
scale_broadcast_op
(
scale1
,
out_lens
,
1
),
scale1
);
auto
s2_bcast
=
m
.
insert_instruction
(
qop
,
scale_broadcast_op
(
scale2
,
out_lens
,
1
),
scale2
);
out_scale
=
m
.
insert_instruction
(
qop
,
migraphx
::
make_op
(
"mul"
),
s1_bcast
,
s2_bcast
);
}
else
if
(
qop
->
name
()
==
"dot"
)
{
dq
=
m
.
insert_instruction
(
qop
,
migraphx
::
make_op
(
"quant_dot"
),
qop_args
);
dq
=
m
.
insert_instruction
(
qop
,
migraphx
::
make_op
(
"quant_dot"
),
qop_args
);
auto
out_lens
=
dq
->
get_shape
().
lens
();
// For (..., M, N) x (..., N, K) dot, only support cases where quantization axis is M
// for input1 and K for input 2
if
(
not
(
is_valid_scale
(
scale1
,
out_lens
,
out_lens
.
size
()
-
2
)
and
is_valid_scale
(
scale2
,
out_lens
,
out_lens
.
size
()
-
1
)))
return
;
auto
s1_bcast
=
m
.
insert_instruction
(
qop
,
scale_broadcast_op
(
scale1
,
out_lens
,
out_lens
.
size
()
-
2
),
scale1
);
auto
s2_bcast
=
m
.
insert_instruction
(
qop
,
scale_broadcast_op
(
scale2
,
out_lens
,
out_lens
.
size
()
-
1
),
scale2
);
out_scale
=
m
.
insert_instruction
(
qop
,
migraphx
::
make_op
(
"mul"
),
s1_bcast
,
s2_bcast
);
}
auto
ins_type
=
qop
->
get_shape
().
type
();
dq_scale
=
m
.
add_literal
(
literal
({
ins_type
},
{
scale
}));
auto
lens
=
dq
->
get_shape
().
lens
();
auto
scale_mb
=
m
.
insert_instruction
(
qop
,
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
lens
}}),
dq_scale
);
dq
=
m
.
insert_instruction
(
qop
,
make_op
(
"dequantizelinear"
),
dq
,
scale_mb
);
dq
=
m
.
insert_instruction
(
qop
,
make_op
(
"dequantizelinear"
),
dq
,
out_scale
);
m
.
replace_instruction
(
qop
,
dq
);
}
};
...
...
src/simplify_reshapes.cpp
View file @
a24ed87e
...
...
@@ -103,8 +103,6 @@ struct find_reshaper
auto
input
=
mr
.
instructions
[
"x"
];
auto
dims
=
ins
->
get_shape
().
lens
();
if
(
not
input
->
get_shape
().
standard
())
input
=
m
.
insert_instruction
(
ins
,
make_op
(
"contiguous"
),
input
);
m
.
replace_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
dims
}}),
input
);
}
};
...
...
@@ -475,9 +473,8 @@ struct find_resize
ins_rsp
,
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
in_dims
}}),
in_rsp
);
auto
mb_rsp
=
m
.
insert_instruction
(
ins_rsp
,
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
out_dims
}}),
rsp_data
);
auto
std_mb
=
m
.
insert_instruction
(
ins
,
migraphx
::
make_op
(
"contiguous"
),
mb_rsp
);
std
::
vector
<
int64_t
>
rsp_dims
(
out_lens
.
begin
(),
out_lens
.
end
());
m
.
replace_instruction
(
ins
,
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
rsp_dims
}}),
std_mb
);
m
.
replace_instruction
(
ins
,
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
rsp_dims
}}),
mb_rsp
);
}
};
...
...
@@ -626,9 +623,8 @@ struct find_transpose_contiguous_reshaper_unary
auto
cont_ins
=
r
.
instructions
[
"cont_ins"
];
auto
unary_op_name
=
ins
->
get_operator
().
name
();
auto
unary_ins
=
m
.
insert_instruction
(
cont_ins
,
make_op
(
unary_op_name
),
trans_ins
);
auto
new_cont_ins
=
m
.
insert_instruction
(
cont_ins
,
make_op
(
"contiguous"
),
unary_ins
);
// older cont and reshape are removed by deadcode elimination
m
.
replace_instruction
(
ins
,
reshaper_ins
->
get_operator
(),
new_cont
_ins
);
m
.
replace_instruction
(
ins
,
reshaper_ins
->
get_operator
(),
unary
_ins
);
}
};
...
...
@@ -647,8 +643,8 @@ struct find_broadcast_transpose
{
auto
transpose
=
r
.
result
;
auto
transpose_lens
=
transpose
->
get_shape
().
lens
();
auto
bcast_ins
=
r
.
instructions
[
"bcast_ins"
];
auto
input
=
bcast_ins
->
inputs
().
front
();
auto
bcast_ins
=
r
.
instructions
[
"bcast_ins"
];
auto
input
=
bcast_ins
->
inputs
().
front
();
// scalar transformation does not need extra transpose
if
(
not
input
->
get_shape
().
scalar
())
{
...
...
src/targets/cpu/CMakeLists.txt
View file @
a24ed87e
...
...
@@ -74,21 +74,27 @@ if(MIGRAPHX_ENABLE_ZENDNN)
target_link_libraries
(
migraphx_cpu PRIVATE
${
BLIS_LIB
}
)
target_link_libraries
(
migraphx_cpu PRIVATE
${
ZENDNN_LIB
}
)
else
()
target_link_libraries
(
migraphx_cpu P
RIVATE
DNNL::dnnl
)
target_link_libraries
(
migraphx_cpu P
UBLIC
DNNL::dnnl
)
endif
()
target_link_libraries
(
migraphx_cpu PRIVATE migraphx
)
migraphx_generate_export_header
(
migraphx_cpu
)
find_package
(
OpenMP
)
target_link_libraries
(
migraphx_cpu PUBLIC OpenMP::OpenMP_CXX
)
# Add library path to rpath to workaround issues with our broken packages
foreach
(
LIBRARY
${
OpenMP_CXX_LIBRARIES
}
)
if
(
LIBRARY MATCHES
"libomp"
)
get_filename_component
(
LIBRARY_PATH
"
${
LIBRARY
}
"
PATH
)
target_link_libraries
(
migraphx_cpu PUBLIC -Wl,-rpath=
${
LIBRARY_PATH
}
-Wl,-rpath-link=
${
LIBRARY_PATH
}
)
endif
()
endforeach
()
if
(
WIN32
)
target_link_libraries
(
migraphx_cpu PUBLIC libomp
)
target_include_directories
(
migraphx_cpu PUBLIC
${
OpenMP_CXX_INCLUDE_DIRS
}
)
target_compile_options
(
migraphx_cpu PUBLIC
${
OpenMP_CXX_FLAGS
}
)
else
()
target_link_libraries
(
migraphx_cpu PUBLIC OpenMP::OpenMP_CXX
)
# Add library path to rpath to workaround issues with our broken packages
foreach
(
LIBRARY
${
OpenMP_CXX_LIBRARIES
}
)
if
(
LIBRARY MATCHES
"libomp"
)
get_filename_component
(
LIBRARY_PATH
"
${
LIBRARY
}
"
PATH
)
target_link_libraries
(
migraphx_cpu PUBLIC -Wl,-rpath=
${
LIBRARY_PATH
}
-Wl,-rpath-link=
${
LIBRARY_PATH
}
)
endif
()
endforeach
()
endif
()
rocm_install_targets
(
TARGETS migraphx_cpu
...
...
src/targets/cpu/dnnl.cpp
View file @
a24ed87e
...
...
@@ -68,6 +68,7 @@ dnnl::memory::data_type to_dnnl_memory_data_type(shape::type_t t)
case
st
::
int32_type
:
return
dt
::
s32
;
case
st
::
int8_type
:
return
dt
::
s8
;
case
st
::
uint8_type
:
return
dt
::
u8
;
case
st
::
fp8e4m3fnuz_type
:
MIGRAPHX_THROW
(
"fp8e4m3fnuz unsupported in DNNL"
);
default:
MIGRAPHX_THROW
(
"Unsupported data type"
);
}
}
...
...
src/targets/cpu/lowering.cpp
View file @
a24ed87e
...
...
@@ -340,7 +340,6 @@ struct cpu_apply
{
"reduce_min"
,
"reduction_min"
},
{
"reduce_sum"
,
"reduction_sum"
},
});
extend_op
(
"concat"
,
"dnnl::concat"
);
extend_op
(
"contiguous"
,
"dnnl::reorder"
);
extend_op
(
"convolution"
,
"dnnl::convolution"
);
...
...
@@ -376,6 +375,12 @@ struct cpu_apply
// Apply these operators first so the inputs can be const folded
for
(
auto
it
:
iterator_for
(
*
modl
))
{
// skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8
// supported yet.
if
(
std
::
any_of
(
it
->
inputs
().
begin
(),
it
->
inputs
().
end
(),
[](
const
auto
&
i
)
{
return
i
->
get_shape
().
type
()
==
migraphx
::
shape
::
fp8e4m3fnuz_type
;
}))
continue
;
if
(
it
->
name
()
==
"pow"
)
{
apply_pow
(
it
);
...
...
@@ -383,6 +388,12 @@ struct cpu_apply
}
for
(
auto
it
:
iterator_for
(
*
modl
))
{
// skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8
// supported yet.
if
(
std
::
any_of
(
it
->
inputs
().
begin
(),
it
->
inputs
().
end
(),
[](
const
auto
&
i
)
{
return
i
->
get_shape
().
type
()
==
migraphx
::
shape
::
fp8e4m3fnuz_type
;
}))
continue
;
if
(
it
->
name
()
==
"pooling"
)
{
apply_pooling
(
it
);
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment