Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
20b1d690
Commit
20b1d690
authored
Sep 20, 2019
by
Paul
Browse files
Merge branch 'develop' into tests
parents
17aaaa1e
ba729cfc
Changes
281
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1434 additions
and
389 deletions
+1434
-389
src/include/migraphx/target.hpp
src/include/migraphx/target.hpp
+120
-0
src/include/migraphx/tensor_view.hpp
src/include/migraphx/tensor_view.hpp
+5
-1
src/include/migraphx/verify.hpp
src/include/migraphx/verify.hpp
+1
-0
src/onnx/CMakeLists.txt
src/onnx/CMakeLists.txt
+2
-1
src/onnx/onnx.cpp
src/onnx/onnx.cpp
+289
-92
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+3
-0
src/opt/memory_coloring_impl.hpp
src/opt/memory_coloring_impl.hpp
+1
-1
src/pass_manager.cpp
src/pass_manager.cpp
+0
-1
src/program.cpp
src/program.cpp
+1
-1
src/propagate_constant.cpp
src/propagate_constant.cpp
+3
-3
src/py/CMakeLists.txt
src/py/CMakeLists.txt
+1
-6
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+18
-11
src/quantization.cpp
src/quantization.cpp
+459
-26
src/rewrite_batchnorm.cpp
src/rewrite_batchnorm.cpp
+57
-0
src/rewrite_pooling.cpp
src/rewrite_pooling.cpp
+42
-0
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+103
-177
src/shape.cpp
src/shape.cpp
+18
-0
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+131
-13
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+179
-56
src/targets/cpu/CMakeLists.txt
src/targets/cpu/CMakeLists.txt
+1
-0
No files found.
src/include/migraphx/target.hpp
View file @
20b1d690
...
...
@@ -11,6 +11,8 @@
#include <migraphx/context.hpp>
#include <migraphx/pass.hpp>
#include <migraphx/config.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/rank.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -34,10 +36,86 @@ struct target
* @return The context to be used during compilation and execution.
*/
context
get_context
()
const
;
/**
* @brief copy an argument to the current target.
*
* @param arg Input argument to be copied to the target
* @return Argument in the target.
*/
argument
copy_to
(
const
argument
&
arg
)
const
;
/**
* @brief copy an argument from the current target.
*
* @param arg Input argument to be copied from the target
* @return Argument in the host.
*/
argument
copy_from
(
const
argument
&
arg
)
const
;
/**
* @brief Allocate an argument based on the input shape
*
* @param s Shape of the argument to be allocated in the target
* @return Allocated argument in the target.
*/
argument
allocate
(
const
shape
&
s
)
const
;
};
#else
template
<
class
T
>
auto
target_allocate
(
rank
<
1
>
,
T
&
x
,
const
shape
&
s
)
->
decltype
(
x
.
allocate
(
s
))
{
return
x
.
allocate
(
s
);
}
template
<
class
T
>
argument
target_allocate
(
rank
<
0
>
,
T
&
x
,
const
shape
&
)
{
std
::
string
name
=
x
.
name
();
MIGRAPHX_THROW
(
"Not computable: "
+
name
);
}
template
<
class
T
>
argument
target_allocate
(
T
&
x
,
const
shape
&
s
)
{
return
target_allocate
(
rank
<
1
>
{},
x
,
s
);
}
template
<
class
T
>
auto
copy_to_target
(
rank
<
1
>
,
T
&
x
,
const
argument
&
arg
)
->
decltype
(
x
.
copy_to
(
arg
))
{
return
x
.
copy_to
(
arg
);
}
template
<
class
T
>
argument
copy_to_target
(
rank
<
0
>
,
T
&
,
const
argument
&
arg
)
{
return
arg
;
}
template
<
class
T
>
argument
copy_to_target
(
T
&
x
,
const
argument
&
arg
)
{
return
copy_to_target
(
rank
<
1
>
{},
x
,
arg
);
}
template
<
class
T
>
auto
copy_from_target
(
rank
<
1
>
,
T
&
x
,
const
argument
&
arg
)
->
decltype
(
x
.
copy_from
(
arg
))
{
return
x
.
copy_from
(
arg
);
}
template
<
class
T
>
argument
copy_from_target
(
rank
<
0
>
,
T
&
,
const
argument
&
arg
)
{
return
arg
;
}
template
<
class
T
>
argument
copy_from_target
(
T
&
x
,
const
argument
&
arg
)
{
return
copy_from_target
(
rank
<
1
>
{},
x
,
arg
);
}
/*
* Type-erased interface for:
*
...
...
@@ -46,6 +124,9 @@ struct target
* std::string name() const;
* std::vector<pass> get_passes(context& ctx) const;
* context get_context() const;
* argument copy_to(const argument& input) const;
* argument copy_from(const argument& input) const;
* argument allocate(const shape& s) const;
* };
*
*/
...
...
@@ -125,6 +206,24 @@ struct target
return
(
*
this
).
private_detail_te_get_handle
().
get_context
();
}
argument
copy_to
(
const
argument
&
input
)
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
return
(
*
this
).
private_detail_te_get_handle
().
copy_to
(
input
);
}
argument
copy_from
(
const
argument
&
input
)
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
return
(
*
this
).
private_detail_te_get_handle
().
copy_from
(
input
);
}
argument
allocate
(
const
shape
&
s
)
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
return
(
*
this
).
private_detail_te_get_handle
().
allocate
(
s
);
}
friend
bool
is_shared
(
const
target
&
private_detail_x
,
const
target
&
private_detail_y
)
{
return
private_detail_x
.
private_detail_te_handle_mem_var
==
...
...
@@ -141,6 +240,9 @@ struct target
virtual
std
::
string
name
()
const
=
0
;
virtual
std
::
vector
<
pass
>
get_passes
(
context
&
ctx
)
const
=
0
;
virtual
context
get_context
()
const
=
0
;
virtual
argument
copy_to
(
const
argument
&
input
)
const
=
0
;
virtual
argument
copy_from
(
const
argument
&
input
)
const
=
0
;
virtual
argument
allocate
(
const
shape
&
s
)
const
=
0
;
};
template
<
typename
PrivateDetailTypeErasedT
>
...
...
@@ -181,6 +283,24 @@ struct target
context
get_context
()
const
override
{
return
private_detail_te_value
.
get_context
();
}
argument
copy_to
(
const
argument
&
input
)
const
override
{
return
copy_to_target
(
private_detail_te_value
,
input
);
}
argument
copy_from
(
const
argument
&
input
)
const
override
{
return
copy_from_target
(
private_detail_te_value
,
input
);
}
argument
allocate
(
const
shape
&
s
)
const
override
{
return
target_allocate
(
private_detail_te_value
,
s
);
}
PrivateDetailTypeErasedT
private_detail_te_value
;
};
...
...
src/include/migraphx/tensor_view.hpp
View file @
20b1d690
...
...
@@ -132,7 +132,11 @@ struct tensor_view
return
m_data
+
this
->
size
();
}
std
::
vector
<
T
>
to_vector
()
const
{
return
std
::
vector
<
T
>
(
this
->
begin
(),
this
->
end
());
}
template
<
class
U
=
T
>
std
::
vector
<
U
>
to_vector
()
const
{
return
std
::
vector
<
U
>
(
this
->
begin
(),
this
->
end
());
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
tensor_view
<
T
>&
x
)
{
...
...
src/include/migraphx/verify.hpp
View file @
20b1d690
...
...
@@ -168,6 +168,7 @@ bool verify_range(R1&& r1, R2&& r2, double tolerance = 80, double* out_error = n
{
double
threshold
=
std
::
numeric_limits
<
range_value
<
R1
>>::
epsilon
()
*
tolerance
;
auto
error
=
rms_range
(
r1
,
r2
);
// cppcheck-suppress uninitvar
if
(
out_error
!=
nullptr
)
*
out_error
=
error
;
return
error
<=
threshold
;
...
...
src/onnx/CMakeLists.txt
View file @
20b1d690
...
...
@@ -9,6 +9,7 @@ set_target_properties(onnx-proto PROPERTIES POSITION_INDEPENDENT_CODE On)
add_library
(
migraphx_onnx onnx.cpp
)
set_target_properties
(
migraphx_onnx PROPERTIES EXPORT_NAME onnx
)
rocm_set_soversion
(
migraphx_onnx
${
PROJECT_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_onnx
)
target_link_libraries
(
migraphx_onnx PRIVATE onnx-proto
)
target_link_libraries
(
migraphx_onnx PUBLIC migraphx
)
...
...
@@ -19,7 +20,7 @@ rocm_install_targets(
add_executable
(
read_onnx read_onnx.cpp
)
rocm_clang_tidy_check
(
read_onnx
)
target_link_libraries
(
read_onnx migraphx_onnx
)
target_link_libraries
(
read_onnx
migraphx_cpu
migraphx_onnx
)
if
(
MIGRAPHX_ENABLE_GPU
)
...
...
src/onnx/onnx.cpp
View file @
20b1d690
...
...
@@ -40,6 +40,7 @@ struct onnx_parser
add_generic_op
(
"Sigmoid"
,
op
::
sigmoid
{});
add_generic_op
(
"Abs"
,
op
::
abs
{});
add_generic_op
(
"Exp"
,
op
::
exp
{});
add_generic_op
(
"Erf"
,
op
::
erf
{});
add_generic_op
(
"Log"
,
op
::
log
{});
// disable dropout for inference
add_generic_op
(
"Dropout"
,
op
::
identity
{});
...
...
@@ -53,21 +54,29 @@ struct onnx_parser
add_generic_op
(
"Asin"
,
op
::
asin
{});
add_generic_op
(
"Acos"
,
op
::
acos
{});
add_generic_op
(
"Atan"
,
op
::
atan
{});
add_generic_op
(
"Sqrt"
,
op
::
sqrt
{});
add_generic_op
(
"Round"
,
op
::
round
{});
add_generic_op
(
"Sign"
,
op
::
sign
{});
add_binary_op
(
"Add"
,
op
::
add
{});
add_binary_op
(
"Div"
,
op
::
div
{});
add_binary_op
(
"Mul"
,
op
::
mul
{});
add_binary_op
(
"Sub"
,
op
::
sub
{});
add_binary_op
(
"Pow"
,
op
::
pow
{});
add_variadic_op
(
"Sum"
,
op
::
add
{});
add_variadic_op
(
"Max"
,
op
::
max
{});
add_variadic_op
(
"Min"
,
op
::
min
{});
add_mem_op
(
"ArgMax"
,
&
onnx_parser
::
parse_arg_op
<
op
::
argmax
>
);
add_mem_op
(
"ArgMin"
,
&
onnx_parser
::
parse_arg_op
<
op
::
argmin
>
);
add_mem_op
(
"Cast"
,
&
onnx_parser
::
parse_cast
);
add_mem_op
(
"Clip"
,
&
onnx_parser
::
parse_clip
);
add_mem_op
(
"LRN"
,
&
onnx_parser
::
parse_lrn
);
add_mem_op
(
"ImageScaler"
,
&
onnx_parser
::
parse_imagescaler
);
add_mem_op
(
"LeakyRelu"
,
&
onnx_parser
::
parse_leaky_relu
);
add_mem_op
(
"Elu"
,
&
onnx_parser
::
parse_elu
);
add_mem_op
(
"Expand"
,
&
onnx_parser
::
parse_expand
);
add_mem_op
(
"Constant"
,
&
onnx_parser
::
parse_constant
);
add_mem_op
(
"Conv"
,
&
onnx_parser
::
parse_conv
);
add_mem_op
(
"MaxPool"
,
&
onnx_parser
::
parse_pooling
);
...
...
@@ -79,8 +88,8 @@ struct onnx_parser
add_mem_op
(
"Gemm"
,
&
onnx_parser
::
parse_gemm
);
add_mem_op
(
"MatMul"
,
&
onnx_parser
::
parse_matmul
);
add_mem_op
(
"BatchNormalization"
,
&
onnx_parser
::
parse_batchnorm
);
add_mem_op
(
"Softmax"
,
&
onnx_parser
::
parse_softmax
);
add_mem_op
(
"LogSoftmax"
,
&
onnx_parser
::
parse_logsoftmax
);
add_mem_op
(
"Softmax"
,
&
onnx_parser
::
parse_softmax
<
op
::
softmax
>
);
add_mem_op
(
"LogSoftmax"
,
&
onnx_parser
::
parse_
softmax
<
op
::
logsoftmax
>
);
add_mem_op
(
"Squeeze"
,
&
onnx_parser
::
parse_squeeze
);
add_mem_op
(
"Unsqueeze"
,
&
onnx_parser
::
parse_unsqueeze
);
add_mem_op
(
"Slice"
,
&
onnx_parser
::
parse_slice
);
...
...
@@ -88,11 +97,14 @@ struct onnx_parser
add_mem_op
(
"Gather"
,
&
onnx_parser
::
parse_gather
);
add_mem_op
(
"Shape"
,
&
onnx_parser
::
parse_shape
);
add_mem_op
(
"ConstantFill"
,
&
onnx_parser
::
parse_constant_fill
);
add_mem_op
(
"ConstantOfShape"
,
&
onnx_parser
::
parse_constant_of_shape
);
add_mem_op
(
"Transpose"
,
&
onnx_parser
::
parse_transpose
);
add_mem_op
(
"RNN"
,
&
onnx_parser
::
parse_rnn
);
add_mem_op
(
"GRU"
,
&
onnx_parser
::
parse_gru
);
add_mem_op
(
"LSTM"
,
&
onnx_parser
::
parse_lstm
);
add_mem_op
(
"Pad"
,
&
onnx_parser
::
parse_pad
);
add_mem_op
(
"ReduceSum"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_sum
>
);
add_mem_op
(
"ReduceMean"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_mean
>
);
// init the activation function map
init_actv_func
();
...
...
@@ -100,6 +112,7 @@ struct onnx_parser
void
init_actv_func
()
{
// Support name format of all lower case or the first letter capital
map_actv_funcs
.
insert
(
std
::
make_pair
(
"tanh"
,
op
::
tanh
{}));
map_actv_funcs
.
insert
(
std
::
make_pair
(
"relu"
,
op
::
relu
{}));
map_actv_funcs
.
insert
(
std
::
make_pair
(
"sigmoid"
,
op
::
sigmoid
{}));
...
...
@@ -181,11 +194,29 @@ struct onnx_parser
s0
.
end
(),
s1
.
begin
()
+
offset
,
out_lens
.
begin
()
+
offset
,
[](
auto
a
,
auto
b
)
{
return
std
::
max
(
a
,
b
);
});
[
&
](
auto
a
,
auto
b
)
{
if
(
a
!=
b
and
a
!=
1
and
b
!=
1
)
{
MIGRAPHX_THROW
(
"COMPUTE_BROADCASTLEN: shape {"
+
to_string_range
(
s0
)
+
"} and {"
+
to_string_range
(
s1
)
+
"} mismatch!"
);
}
return
std
::
max
(
a
,
b
);
});
return
out_lens
;
}
instruction_ref
make_contiguous
(
instruction_ref
ins
)
{
if
(
ins
->
get_shape
().
standard
())
{
return
ins
;
}
return
prog
.
add_instruction
(
op
::
contiguous
{},
ins
);
}
template
<
class
T
>
instruction_ref
add_broadcastable_binary_op
(
instruction_ref
arg0
,
instruction_ref
arg1
,
T
x
)
{
...
...
@@ -242,27 +273,46 @@ struct onnx_parser
return
prog
.
add_instruction
(
op
,
std
::
move
(
args
));
}
instruction_ref
parse_softmax
(
const
std
::
string
&
,
const
attribute_map
&
,
std
::
vector
<
instruction_ref
>
args
)
template
<
class
Op
>
instruction_ref
parse_softmax
(
const
std
::
string
&
,
const
attribute_map
&
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
dims
=
args
.
front
()
->
get_shape
().
lens
();
auto
r
=
prog
.
add_instruction
(
op
::
reshape
{{
long
(
dims
[
0
]),
long
(
dims
[
1
]),
1
,
1
}},
args
.
front
());
auto
s
=
prog
.
add_instruction
(
op
::
softmax
{},
r
);
return
prog
.
add_instruction
(
op
::
reshape
{{
long
(
dims
[
0
]),
long
(
dims
[
1
])}},
s
);
int
axis
=
1
;
if
(
contains
(
attributes
,
"axis"
))
{
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
}
return
prog
.
add_instruction
(
Op
{
axis
},
std
::
move
(
args
));
}
instruction_ref
parse_logsoftmax
(
const
std
::
string
&
,
const
attribute_map
&
attributes
,
std
::
vector
<
instruction_ref
>
args
)
template
<
class
Op
>
instruction_ref
parse_arg_op
(
const
std
::
string
&
,
const
attribute_map
&
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
int
axis
=
1
;
int
64_t
axis
=
0
;
if
(
contains
(
attributes
,
"axis"
))
{
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
axis
=
static_cast
<
int64_t
>
(
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
()
)
;
}
return
prog
.
add_instruction
(
op
::
logsoftmax
{
axis
},
std
::
move
(
args
));
int
keep_dims
=
1
;
if
(
contains
(
attributes
,
"keepdims"
))
{
keep_dims
=
parse_value
(
attributes
.
at
(
"keepdims"
)).
at
<
int
>
();
}
if
(
keep_dims
==
0
)
{
auto
ins
=
prog
.
add_instruction
(
Op
{
axis
},
std
::
move
(
args
));
return
prog
.
add_instruction
(
op
::
squeeze
{{
axis
}},
ins
);
}
else
{
return
prog
.
add_instruction
(
Op
{
axis
},
std
::
move
(
args
));
}
}
instruction_ref
...
...
@@ -274,7 +324,11 @@ struct onnx_parser
{
if
(
contains
(
attributes
,
"auto_pad"
))
{
MIGRAPHX_THROW
(
"auto_pad and padding cannot be specified simultaneously"
);
auto
s
=
attributes
[
"auto_pad"
].
s
();
if
(
contains
(
attributes
,
"pads"
)
and
to_upper
(
s
)
!=
"NOTSET"
)
{
MIGRAPHX_THROW
(
"auto_pad and padding cannot be specified simultaneously"
);
}
}
std
::
vector
<
std
::
int64_t
>
padding
;
copy
(
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
padding
));
...
...
@@ -322,7 +376,7 @@ struct onnx_parser
if
(
args
.
size
()
==
3
)
{
uint64_t
axis
=
1
;
auto
l1
=
prog
.
add_instruction
(
op
,
args
[
0
]
,
args
[
1
]);
auto
l1
=
prog
.
add_instruction
(
op
,
l0
,
args
[
1
]);
auto
l2
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
l1
->
get_shape
().
lens
()},
args
[
2
]);
return
prog
.
add_instruction
(
op
::
add
{},
l1
,
l2
);
}
...
...
@@ -352,7 +406,8 @@ struct onnx_parser
{
// insert zeros for pad op (args[0] has 4 dims)
padding
=
{
0
,
0
,
padding
[
0
],
padding
[
1
],
0
,
0
,
padding
[
2
],
padding
[
3
]};
l0
=
prog
.
add_instruction
(
op
::
pad
{
padding
},
l0
);
l0
=
prog
.
add_instruction
(
op
::
pad
{
padding
,
std
::
numeric_limits
<
float
>::
lowest
()},
l0
);
}
else
{
...
...
@@ -393,11 +448,11 @@ struct onnx_parser
if
(
args
.
size
()
==
2
)
{
auto
s
=
args
[
1
]
->
eval
();
if
(
s
.
empty
())
MIGRAPHX_THROW
(
"Dynamic shape is not supported."
);
check_arg_empty
(
s
,
"Reshape: dynamic shape is not supported"
);
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
dims
));
});
}
return
prog
.
add_instruction
(
op
,
args
[
0
]);
return
prog
.
add_instruction
(
op
,
make_contiguous
(
args
[
0
]));
}
instruction_ref
...
...
@@ -445,23 +500,33 @@ struct onnx_parser
{
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
}
op
::
gather
op
{
axis
};
return
prog
.
add_instruction
(
op
,
std
::
move
(
args
));
return
prog
.
add_instruction
(
op
,
make_contiguous
(
args
[
0
]),
make_contiguous
(
args
[
1
]
));
}
instruction_ref
parse_slice
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
op
::
slice
op
;
std
::
vector
<
size_t
>
dims
=
args
[
0
]
->
get_shape
().
lens
();
size_t
num_dims
=
dims
.
size
();
if
(
contains
(
attributes
,
"axes"
))
{
literal
s
=
parse_value
(
attributes
.
at
(
"axes"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
axes
));
});
}
else
{
op
.
axes
=
std
::
vector
<
int64_t
>
(
num_dims
);
std
::
iota
(
op
.
axes
.
begin
(),
op
.
axes
.
end
(),
0
);
}
if
(
contains
(
attributes
,
"ends"
))
{
literal
s
=
parse_value
(
attributes
.
at
(
"ends"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
ends
));
});
op
.
ends
=
get_indices
(
attributes
.
at
(
"ends"
));
}
if
(
contains
(
attributes
,
"starts"
))
{
literal
s
=
parse_value
(
attributes
.
at
(
"starts"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
starts
));
});
...
...
@@ -473,7 +538,13 @@ struct onnx_parser
attribute_map
attributes
,
const
std
::
vector
<
instruction_ref
>&
)
{
literal
v
=
parse_value
(
attributes
.
at
(
"value"
));
literal
v
=
parse_value
(
attributes
.
at
(
"value"
));
// return empty literal
if
(
v
.
get_shape
().
elements
()
==
0
)
{
return
prog
.
add_literal
(
literal
{});
}
auto
dim_size
=
attributes
.
at
(
"value"
).
t
().
dims_size
();
// if dim_size is 0, it is a scalar
if
(
dim_size
==
0
)
...
...
@@ -604,7 +675,6 @@ struct onnx_parser
float
epsilon
=
1e-5
f
;
float
momentum
=
0.9
f
;
op
::
batch_norm_inference
::
bn_infer_mode_t
bn_mode
=
op
::
batch_norm_inference
::
spatial
;
bool
is_test
=
false
;
if
(
contains
(
attributes
,
"epsilon"
))
{
epsilon
=
parse_value
(
attributes
.
at
(
"epsilon"
)).
at
<
float
>
();
...
...
@@ -613,17 +683,12 @@ struct onnx_parser
{
momentum
=
parse_value
(
attributes
.
at
(
"momentum"
)).
at
<
float
>
();
}
if
(
contains
(
attributes
,
"is_test"
))
{
is_test
=
parse_value
(
attributes
.
at
(
"is_test"
)).
at
<
uint64_t
>
()
>
0
;
}
if
(
contains
(
attributes
,
"spatial"
))
{
bn_mode
=
(
parse_value
(
attributes
.
at
(
"spatial"
)).
at
<
uint64_t
>
()
>
0
)
?
op
::
batch_norm_inference
::
spatial
:
op
::
batch_norm_inference
::
per_activation
;
}
(
void
)
is_test
;
op
::
batch_norm_inference
op
{
epsilon
,
momentum
,
bn_mode
};
return
prog
.
add_instruction
(
op
,
std
::
move
(
args
));
}
...
...
@@ -770,7 +835,7 @@ struct onnx_parser
{
dtype
=
parse_value
(
attributes
.
at
(
"dtype"
)).
at
<
int
>
();
}
migraphx
::
shape
::
type_t
type
=
get_type
(
dtype
);
shape
::
type_t
type
=
get_type
(
dtype
);
if
(
contains
(
attributes
,
"input_as_shape"
))
{
...
...
@@ -801,10 +866,7 @@ struct onnx_parser
}
migraphx
::
argument
in
=
args
[
0
]
->
eval
();
if
(
in
.
empty
())
{
MIGRAPHX_THROW
(
"ConstantFill: cannot handle dynamic shape as input"
);
}
check_arg_empty
(
in
,
"ConstantFill: dynamic shape is not supported"
);
std
::
vector
<
std
::
size_t
>
dims
;
in
.
visit
([
&
](
auto
input
)
{
dims
.
assign
(
input
.
begin
(),
input
.
end
());
});
...
...
@@ -832,6 +894,73 @@ struct onnx_parser
}
}
instruction_ref
parse_constant_of_shape
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
literal
l_val
{};
if
(
contains
(
attributes
,
"value"
))
{
l_val
=
parse_value
(
attributes
.
at
(
"value"
));
if
(
l_val
.
get_shape
().
elements
()
!=
1
)
{
MIGRAPHX_THROW
(
"ConstantOfShape: attribute value can contain only 1 elements!"
);
}
}
else
{
l_val
=
literal
({
shape
::
float_type
,
{
1
},
{
0
}},
{
0.0
f
});
}
// input is empty, output is a scalar
auto
type
=
l_val
.
get_shape
().
type
();
if
(
args
.
empty
())
{
MIGRAPHX_THROW
(
"ConstantOfShape : must have 1 input!"
);
}
else
{
migraphx
::
shape
s
;
// empty input tensor, output is a scalar
if
(
args
[
0
]
->
get_shape
().
elements
()
==
0
)
{
s
=
migraphx
::
shape
{
type
,
{
1
},
{
0
}};
}
else
{
migraphx
::
argument
in
=
args
[
0
]
->
eval
();
check_arg_empty
(
in
,
"ConstantOfShape: dynamic shape is not supported"
);
std
::
vector
<
std
::
size_t
>
dims
;
in
.
visit
([
&
](
auto
input
)
{
dims
.
assign
(
input
.
begin
(),
input
.
end
());
});
s
=
migraphx
::
shape
{
type
,
dims
};
}
literal
l_out
{};
l_val
.
visit
([
&
](
auto
val
)
{
using
val_type
=
std
::
remove_cv_t
<
typename
decltype
(
val
)
::
value_type
>
;
// l_val contains only one element
std
::
vector
<
val_type
>
out_vec
(
s
.
elements
(),
*
val
.
begin
());
l_out
=
literal
(
s
,
out_vec
);
});
return
prog
.
add_literal
(
l_out
);
}
}
instruction_ref
parse_expand
(
const
std
::
string
&
,
const
attribute_map
&
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
in_lens
=
args
[
0
]
->
get_shape
().
lens
();
migraphx
::
argument
arg_s
=
args
[
1
]
->
eval
();
check_arg_empty
(
arg_s
,
"Expand: dynamic shape is not supported"
);
std
::
vector
<
std
::
size_t
>
dims
;
arg_s
.
visit
([
&
](
auto
input
)
{
dims
.
assign
(
input
.
begin
(),
input
.
end
());
});
auto
out_lens
=
compute_broadcasted_lens
(
in_lens
,
dims
);
return
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
args
[
0
]);
}
std
::
vector
<
instruction_ref
>
parse_rnn
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
...
...
@@ -870,7 +999,9 @@ struct onnx_parser
auto
names
=
attributes
.
at
(
"activations"
).
strings
();
vec_names
.
clear
();
vec_names
.
resize
(
names
.
size
());
std
::
copy
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
(),
[](
auto
name
)
{
return
to_lower
(
name
);
});
}
auto
name_it
=
std
::
find_if
(
vec_names
.
begin
(),
vec_names
.
end
(),
[
&
](
auto
&
name
)
{
...
...
@@ -894,9 +1025,10 @@ struct onnx_parser
}
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
auto
&
fn
)
{
return
map_actv_funcs
[
fn
];
});
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
const
auto
&
fn
)
{
return
map_actv_funcs
[
fn
];
});
// To be added later
float
clip
=
0.0
;
...
...
@@ -961,7 +1093,9 @@ struct onnx_parser
auto
names
=
attributes
.
at
(
"activations"
).
strings
();
vec_names
.
clear
();
vec_names
.
resize
(
names
.
size
());
std
::
copy
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
(),
[](
auto
name
)
{
return
to_lower
(
name
);
});
}
// need 4 activation functions
...
...
@@ -1008,9 +1142,10 @@ struct onnx_parser
}
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
auto
&
name
)
{
return
map_actv_funcs
[
name
];
});
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
const
auto
&
name
)
{
return
map_actv_funcs
[
name
];
});
float
clip
=
0.0
;
if
(
contains
(
attributes
,
"clip"
))
...
...
@@ -1088,7 +1223,9 @@ struct onnx_parser
auto
names
=
attributes
.
at
(
"activations"
).
strings
();
vec_names
.
clear
();
vec_names
.
resize
(
names
.
size
());
std
::
copy
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
(),
[](
auto
name
)
{
return
to_lower
(
name
);
});
}
// need 6 activation functions for bidirectional directions
...
...
@@ -1178,9 +1315,10 @@ struct onnx_parser
}
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
auto
&
name
)
{
return
map_actv_funcs
[
name
];
});
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
const
auto
&
name
)
{
return
map_actv_funcs
[
name
];
});
float
clip
=
0.0
;
if
(
contains
(
attributes
,
"clip"
))
...
...
@@ -1214,6 +1352,53 @@ struct onnx_parser
return
{
hidden_states
,
last_output
,
last_cell_output
};
}
template
<
class
T
>
instruction_ref
parse_reduce_oper
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
std
::
size_t
n_dim
=
args
.
front
()
->
get_shape
().
lens
().
size
();
// default to reduce over all dimensions
std
::
vector
<
int64_t
>
axes
(
n_dim
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
0
);
if
(
contains
(
attributes
,
"axes"
))
{
axes
.
clear
();
auto
&&
attr_axes
=
attributes
[
"axes"
].
ints
();
axes
=
std
::
vector
<
int64_t
>
(
attr_axes
.
begin
(),
attr_axes
.
end
());
}
int
keep_dims
=
1
;
if
(
contains
(
attributes
,
"keepdims"
))
{
keep_dims
=
parse_value
(
attributes
.
at
(
"keepdims"
)).
at
<
int
>
();
}
if
(
keep_dims
==
1
)
{
return
prog
.
add_instruction
(
T
{
axes
},
std
::
move
(
args
));
}
else
{
auto
ins
=
prog
.
add_instruction
(
T
{
axes
},
std
::
move
(
args
));
return
prog
.
add_instruction
(
op
::
squeeze
{
axes
},
ins
);
}
}
instruction_ref
parse_cast
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
if
(
!
contains
(
attributes
,
"to"
))
{
MIGRAPHX_THROW
(
"PARSE_CAST: missing to type attribute!"
);
}
int
to_type
=
parse_value
(
attributes
.
at
(
"to"
)).
at
<
int
>
();
shape
::
type_t
type
=
get_type
(
to_type
);
return
prog
.
add_instruction
(
op
::
convert
{
type
},
std
::
move
(
args
));
}
void
parse_from
(
std
::
istream
&
is
)
{
onnx
::
ModelProto
model
;
...
...
@@ -1349,6 +1534,20 @@ struct onnx_parser
return
result
;
}
static
std
::
vector
<
int64_t
>
get_indices
(
const
onnx
::
AttributeProto
&
attr
)
{
std
::
vector
<
int64_t
>
result
;
literal
s
=
parse_value
(
attr
);
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
result
));
});
// Clamp large indices to -1
std
::
replace_if
(
result
.
begin
(),
result
.
end
(),
[](
auto
x
)
{
return
x
>
int64_t
{
std
::
numeric_limits
<
std
::
int32_t
>::
max
()}
/
2
;
},
-
1
);
return
result
;
}
template
<
class
T
>
static
literal
from_repeated
(
shape
::
type_t
t
,
const
T
&
r
)
{
...
...
@@ -1360,16 +1559,16 @@ struct onnx_parser
{
switch
(
attr
.
type
())
{
case
onnx
::
AttributeProto
::
UNDEFINED
:
return
{};
case
onnx
::
AttributeProto
::
FLOAT
:
return
literal
{
attr
.
f
()};
case
onnx
::
AttributeProto
::
INT
:
return
literal
{
attr
.
i
()};
case
onnx
::
AttributeProto
::
STRING
:
return
{};
case
onnx
::
AttributeProto
::
TENSOR
:
return
parse_tensor
(
attr
.
t
());
case
onnx
::
AttributeProto
::
GRAPH
:
return
{};
case
onnx
::
AttributeProto
::
FLOATS
:
return
from_repeated
(
shape
::
float_type
,
attr
.
floats
());
case
onnx
::
AttributeProto
::
INTS
:
return
from_repeated
(
shape
::
int64_type
,
attr
.
ints
());
case
onnx
::
AttributeProto
::
STRINGS
:
return
{};
case
onnx
::
AttributeProto
::
TENSORS
:
return
{};
case
onnx
::
AttributeProto
::
UNDEFINED
:
case
onnx
::
AttributeProto
::
GRAPH
:
case
onnx
::
AttributeProto
::
STRING
:
case
onnx
::
AttributeProto
::
STRINGS
:
case
onnx
::
AttributeProto
::
TENSORS
:
case
onnx
::
AttributeProto
::
GRAPHS
:
return
{};
}
MIGRAPHX_THROW
(
"Invalid attribute type"
);
...
...
@@ -1383,47 +1582,41 @@ struct onnx_parser
const
std
::
string
&
s
=
t
.
raw_data
();
switch
(
t
.
data_type
())
{
case
onnx
::
TensorProto
::
UNDEFINED
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
FLOAT
:
return
create_literal
(
shape
::
float_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
UINT8
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
INT8
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
UINT16
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
INT16
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
INT32
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
INT64
:
return
create_literal
(
shape
::
int64_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
STRING
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
BOOL
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
FLOAT16
:
return
create_literal
(
shape
::
half_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
DOUBLE
:
return
create_literal
(
shape
::
double_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
UINT32
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
UINT64
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
COMPLEX64
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
INT64
:
return
create_literal
(
shape
::
int64_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
INT8
:
case
onnx
::
TensorProto
::
UINT16
:
case
onnx
::
TensorProto
::
INT16
:
case
onnx
::
TensorProto
::
INT32
:
case
onnx
::
TensorProto
::
BOOL
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
UINT8
:
case
onnx
::
TensorProto
::
STRING
:
case
onnx
::
TensorProto
::
UNDEFINED
:
case
onnx
::
TensorProto
::
UINT32
:
case
onnx
::
TensorProto
::
UINT64
:
case
onnx
::
TensorProto
::
COMPLEX64
:
case
onnx
::
TensorProto
::
COMPLEX128
:
throw
std
::
runtime_error
(
""
);
}
MIGRAPHX_THROW
(
"Invalid tensor type"
);
}
switch
(
t
.
data_type
())
{
case
onnx
::
TensorProto
::
UNDEFINED
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
FLOAT
:
return
create_literal
(
shape
::
float_type
,
dims
,
t
.
float_data
());
case
onnx
::
TensorProto
::
UINT8
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
INT8
:
return
create_literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
());
case
onnx
::
TensorProto
::
UINT16
:
return
create_literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
());
case
onnx
::
TensorProto
::
INT16
:
return
create_literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
());
case
onnx
::
TensorProto
::
INT32
:
case
onnx
::
TensorProto
::
BOOL
:
return
create_literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
());
case
onnx
::
TensorProto
::
INT64
:
return
create_literal
(
shape
::
int64_type
,
dims
,
t
.
int64_data
());
case
onnx
::
TensorProto
::
STRING
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
BOOL
:
return
create_literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
());
case
onnx
::
TensorProto
::
DOUBLE
:
return
create_literal
(
shape
::
double_type
,
dims
,
t
.
double_data
());
case
onnx
::
TensorProto
::
FLOAT
:
return
create_literal
(
shape
::
float_type
,
dims
,
t
.
float_data
());
case
onnx
::
TensorProto
::
FLOAT16
:
{
std
::
vector
<
uint16_t
>
data_uint16
(
t
.
int32_data
().
begin
(),
t
.
int32_data
().
end
());
...
...
@@ -1434,11 +1627,12 @@ struct onnx_parser
[](
uint16_t
raw_val
)
{
return
*
reinterpret_cast
<
half
*>
(
&
raw_val
);
});
return
create_literal
(
shape
::
half_type
,
dims
,
data_half
);
}
case
onnx
::
TensorProto
::
DOUBLE
:
return
create_literal
(
shape
::
double_type
,
dims
,
t
.
double_data
());
case
onnx
::
TensorProto
::
UINT32
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
UINT64
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
COMPLEX64
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
UNDEFINED
:
case
onnx
::
TensorProto
::
UINT8
:
case
onnx
::
TensorProto
::
STRING
:
case
onnx
::
TensorProto
::
UINT32
:
case
onnx
::
TensorProto
::
UINT64
:
case
onnx
::
TensorProto
::
COMPLEX64
:
case
onnx
::
TensorProto
::
COMPLEX128
:
throw
std
::
runtime_error
(
""
);
}
MIGRAPHX_THROW
(
"Invalid tensor type"
);
...
...
@@ -1466,28 +1660,23 @@ struct onnx_parser
shape
::
type_t
shape_type
{};
switch
(
t
.
tensor_type
().
elem_type
())
{
case
onnx
::
TensorProto
::
UNDEFINED
:
break
;
// throw std::runtime_error("Unsupported type UNDEFINED");
case
onnx
::
TensorProto
::
FLOAT
:
shape_type
=
shape
::
float_type
;
break
;
case
onnx
::
TensorProto
::
UINT8
:
break
;
// throw std::runtime_error("Unsupported type UINT8");
case
onnx
::
TensorProto
::
INT8
:
shape_type
=
shape
::
int8_type
;
break
;
case
onnx
::
TensorProto
::
UINT16
:
shape_type
=
shape
::
uint16_type
;
break
;
case
onnx
::
TensorProto
::
INT16
:
shape_type
=
shape
::
int16_type
;
break
;
case
onnx
::
TensorProto
::
INT32
:
shape_type
=
shape
::
int32_type
;
break
;
case
onnx
::
TensorProto
::
INT64
:
shape_type
=
shape
::
int64_type
;
break
;
case
onnx
::
TensorProto
::
STRING
:
break
;
// throw std::runtime_error("Unsupported type STRING");
case
onnx
::
TensorProto
::
BOOL
:
break
;
// throw std::runtime_error("Unsupported type BOOL");
case
onnx
::
TensorProto
::
FLOAT16
:
shape_type
=
shape
::
half_type
;
break
;
case
onnx
::
TensorProto
::
DOUBLE
:
shape_type
=
shape
::
double_type
;
break
;
case
onnx
::
TensorProto
::
UINT32
:
shape_type
=
shape
::
uint32_type
;
break
;
case
onnx
::
TensorProto
::
UINT64
:
shape_type
=
shape
::
uint64_type
;
break
;
case
onnx
::
TensorProto
::
UINT8
:
case
onnx
::
TensorProto
::
STRING
:
case
onnx
::
TensorProto
::
BOOL
:
case
onnx
::
TensorProto
::
UNDEFINED
:
case
onnx
::
TensorProto
::
COMPLEX64
:
break
;
// throw std::runtime_error("Unsupported type COMPLEX64");
case
onnx
::
TensorProto
::
COMPLEX128
:
break
;
// throw std::runtime_error("Unsupported type
COMPLEX128
");
break
;
// throw std::runtime_error("Unsupported type");
}
std
::
vector
<
std
::
size_t
>
dims
;
auto
&&
tensor_dims
=
t
.
tensor_type
().
shape
().
dim
();
...
...
@@ -1526,6 +1715,14 @@ struct onnx_parser
}
}
}
void
check_arg_empty
(
const
argument
&
arg
,
const
std
::
string
&
msg
)
{
if
(
arg
.
empty
())
{
MIGRAPHX_THROW
(
msg
);
}
}
};
program
parse_onnx
(
const
std
::
string
&
name
)
...
...
src/opt/memory_coloring_impl.cpp
View file @
20b1d690
...
...
@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
offset
+=
(
element_size
-
(
offset
%
element_size
));
conflict_queue
.
pop
();
}
// when int8 type is used, the offset could be any number
// if not 4-byte aligned, miopen int8 convolution can crash
offset
=
(
offset
+
3
)
/
4
*
4
;
segment
.
offset
=
offset
;
MIGRAPHX_DEBUG
(
segment
.
dump
());
required_bytes
=
std
::
max
(
required_bytes
,
offset
+
segment
.
size
);
...
...
src/opt/memory_coloring_impl.hpp
View file @
20b1d690
...
...
@@ -107,7 +107,7 @@ struct memory_coloring_impl
return
ins
->
name
()
==
"check_context"
;
}
static
bool
is_disjoin
(
live_range
&
range1
,
live_range
&
range2
)
static
bool
is_disjoin
(
const
live_range
&
range1
,
const
live_range
&
range2
)
{
if
((
range1
.
size
==
0
)
||
(
range2
.
size
==
0
))
return
false
;
...
...
src/pass_manager.cpp
View file @
20b1d690
...
...
@@ -2,7 +2,6 @@
#include <migraphx/pass_manager.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/target.hpp>
#include <migraphx/env.hpp>
#include <migraphx/ranges.hpp>
...
...
src/program.cpp
View file @
20b1d690
...
...
@@ -241,7 +241,7 @@ instruction_ref program::remove_instructions(instruction_ref first, instruction_
// TODO: Check every element
assert
(
has_instruction
(
first
));
std
::
for_each
(
first
,
last
,
[
&
](
instruction
&
ins
)
{
ins
.
clear_arguments
();
});
assert
(
std
::
all_of
(
first
,
last
,
[
&
](
instruction
&
ins
)
{
return
ins
.
outputs
().
empty
();
}));
assert
(
std
::
all_of
(
first
,
last
,
[
&
](
const
instruction
&
ins
)
{
return
ins
.
outputs
().
empty
();
}));
return
impl
->
instructions
.
erase
(
first
,
last
);
}
...
...
src/propagate_constant.cpp
View file @
20b1d690
...
...
@@ -10,8 +10,8 @@ inline namespace MIGRAPHX_INLINE_NS {
bool
skip_propogate
(
instruction_ref
ins
)
{
if
(
ins
->
name
()
==
"
@literal
"
)
return
true
;
if
(
ins
->
name
()
==
"
contiguous
"
)
return
skip_propogate
(
ins
->
inputs
().
front
())
;
auto
&&
s
=
ins
->
get_shape
();
if
(
s
.
broadcasted
()
and
not
s
.
scalar
())
return
true
;
...
...
@@ -33,7 +33,7 @@ void propagate_constant::apply(program& p) const
ins
->
outputs
().
end
());
for
(
auto
child
:
children
)
{
if
(
skip_propogate
(
child
))
if
(
child
->
name
()
==
"@literal"
or
skip_propogate
(
child
))
{
self
(
child
);
continue
;
...
...
src/py/CMakeLists.txt
View file @
20b1d690
...
...
@@ -12,12 +12,7 @@ if(MIGRAPHX_ENABLE_PYTHON)
C_VISIBILITY_PRESET hidden
CXX_VISIBILITY_PRESET hidden
)
if
(
MIGRAPHX_ENABLE_TF
)
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_tf migraphx_cpu
)
target_compile_definitions
(
migraphx_py PRIVATE -DENABLE_TF
)
else
()
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_onnx migraphx_cpu
)
endif
()
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_cpu
)
if
(
MIGRAPHX_ENABLE_GPU
)
target_link_libraries
(
migraphx_py PRIVATE migraphx_gpu
)
target_compile_definitions
(
migraphx_py PRIVATE -DHAVE_GPU
)
...
...
src/py/migraphx_py.cpp
View file @
20b1d690
...
...
@@ -6,11 +6,9 @@
#include <migraphx/generate.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/stringutils.hpp>
#ifdef ENABLE_TF
#include <migraphx/tf.hpp>
#else
#include <migraphx/onnx.hpp>
#
endif
#
include <migraphx/type_name.hpp>
#ifdef HAVE_GPU
#include <migraphx/gpu/target.hpp>
...
...
@@ -104,8 +102,13 @@ migraphx::shape to_shape(const py::buffer_info& info)
t
=
as
.
type_enum
();
n
=
sizeof
(
as
());
}
});
if
(
n
==
0
)
{
MIGRAPHX_THROW
(
"MIGRAPHX PYTHON: Unsupported data type"
+
info
.
format
);
}
auto
strides
=
info
.
strides
;
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
strides
.
begin
(),
[
&
](
auto
i
)
->
std
::
size_t
{
return
n
>
0
?
i
/
n
:
0
;
...
...
@@ -153,6 +156,7 @@ PYBIND11_MODULE(migraphx, m)
py
::
class_
<
migraphx
::
target
>
(
m
,
"target"
);
py
::
class_
<
migraphx
::
program
>
(
m
,
"program"
)
.
def
(
"clone"
,
[](
migraphx
::
program
&
p
)
{
return
*
(
new
migraphx
::
program
(
p
));
})
.
def
(
"get_parameter_shapes"
,
&
migraphx
::
program
::
get_parameter_shapes
)
.
def
(
"get_shape"
,
&
migraphx
::
program
::
get_shape
)
.
def
(
"compile"
,
[](
migraphx
::
program
&
p
,
const
migraphx
::
target
&
t
)
{
p
.
compile
(
t
);
})
...
...
@@ -161,16 +165,13 @@ PYBIND11_MODULE(migraphx, m)
.
def
(
"__ne__"
,
std
::
not_equal_to
<
migraphx
::
program
>
{})
.
def
(
"__repr__"
,
[](
const
migraphx
::
program
&
p
)
{
return
migraphx
::
to_string
(
p
);
});
#ifdef ENABLE_TF
m
.
def
(
"parse_tf"
,
&
migraphx
::
parse_tf
,
"Parse tf protobuf (default format is nhwc)"
,
py
::
arg
(
"filename"
),
py
::
arg
(
"is_nhwc"
)
=
true
);
#else
m
.
def
(
"parse_onnx"
,
&
migraphx
::
parse_onnx
);
#endif
m
.
def
(
"get_target"
,
[](
const
std
::
string
&
name
)
->
migraphx
::
target
{
if
(
name
==
"cpu"
)
return
migraphx
::
cpu
::
target
{};
...
...
@@ -182,10 +183,16 @@ PYBIND11_MODULE(migraphx, m)
});
m
.
def
(
"generate_argument"
,
&
migraphx
::
generate_argument
,
py
::
arg
(
"s"
),
py
::
arg
(
"seed"
)
=
0
);
m
.
def
(
"quantize"
,
[](
migraphx
::
program
&
p
,
std
::
vector
<
std
::
string
>&
ins_names
)
{
migraphx
::
quantize
(
p
,
ins_names
);
});
m
.
def
(
"quantize"
,
[](
migraphx
::
program
&
p
)
{
migraphx
::
quantize
(
p
,
{
"all"
});
});
m
.
def
(
"quantize_fp16"
,
&
migraphx
::
quantize_fp16
,
py
::
arg
(
"prog"
),
py
::
arg
(
"ins_names"
)
=
std
::
vector
<
std
::
string
>
{
"all"
});
m
.
def
(
"quantize_int8"
,
&
migraphx
::
quantize_int8
,
py
::
arg
(
"prog"
),
py
::
arg
(
"t"
),
py
::
arg
(
"calibration"
)
=
std
::
vector
<
migraphx
::
program
::
parameter_map
>
{},
py
::
arg
(
"ins_names"
)
=
std
::
vector
<
std
::
string
>
{
"dot"
,
"convolution"
});
#ifdef HAVE_GPU
m
.
def
(
"allocate_gpu"
,
&
migraphx
::
gpu
::
allocate_gpu
,
py
::
arg
(
"s"
),
py
::
arg
(
"host"
)
=
false
);
...
...
src/quantization.cpp
View file @
20b1d690
...
...
@@ -3,33 +3,104 @@
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/clip.hpp>
#include <migraphx/op/round.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/capture.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/target.hpp>
#include <utility>
#include <set>
#include <iomanip>
#include <fstream>
#include <algorithm>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
instruction_ref
insert_fp16
(
program
&
prog
,
instruction_ref
&
ins
,
shape
::
type_t
type
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_fp16
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_INT8_QUANTIZATION_PARAMS
)
instruction_ref
insert_quant_ins
(
program
&
prog
,
instruction_ref
&
ins
,
shape
::
type_t
type
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_ins
,
float
scale
=
1.0
f
,
float
shift
=
0.0
f
)
{
if
(
map_fp16
.
count
(
ins
)
>
0
)
if
(
map_ins
.
count
(
ins
)
>
0
)
{
return
map_ins
[
ins
];
}
if
(
ins
->
name
()
==
"undefined"
)
{
return
ins
;
}
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
or
ins
->
get_shape
().
type
()
==
shape
::
double_type
or
ins
->
get_shape
().
type
()
==
shape
::
int32_type
or
ins
->
get_shape
().
type
()
==
shape
::
half_type
);
instruction_ref
quant_ins
{};
auto
insert_loc
=
std
::
next
(
ins
);
if
(
type
==
shape
::
int8_type
)
{
auto
scaled_ins
=
ins
;
if
(
scale
!=
1.0
f
)
{
auto
float_ins
=
scaled_ins
;
if
(
scaled_ins
->
get_shape
().
type
()
!=
shape
::
float_type
)
{
float_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
convert
{
shape
::
float_type
},
scaled_ins
);
}
std
::
vector
<
float
>
vec_scale
(
scaled_ins
->
get_shape
().
elements
(),
scale
);
auto
l_scale
=
prog
.
add_literal
(
literal
(
float_ins
->
get_shape
(),
vec_scale
));
scaled_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
mul
{},
l_scale
,
float_ins
);
}
auto
shifted_ins
=
scaled_ins
;
if
(
shift
!=
0.0
f
)
{
auto
float_ins
=
shifted_ins
;
if
(
shifted_ins
->
get_shape
().
type
()
!=
shape
::
float_type
)
{
float_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
convert
{
shape
::
float_type
},
shifted_ins
);
}
std
::
vector
<
float
>
vec_shift
(
shifted_ins
->
get_shape
().
elements
(),
shift
);
auto
l_shift
=
prog
.
add_literal
(
literal
(
float_ins
->
get_shape
(),
vec_shift
));
shifted_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
add
{},
l_shift
,
float_ins
);
}
auto
rounded_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
round
{},
shifted_ins
);
auto
clipped_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
clip
{
127.0
f
,
-
128.0
f
},
rounded_ins
);
quant_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
convert
{
type
},
clipped_ins
);
}
else
{
return
map_fp16
[
ins
]
;
quant_ins
=
prog
.
insert_instruction
(
insert_loc
,
op
::
convert
{
type
},
ins
)
;
}
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
ins
->
get_shape
().
type
()
==
shape
::
double_type
);
instruction_ref
ins_fp16
{};
ins_fp16
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
map_fp16
[
ins
]
=
ins_fp16
;
map_ins
[
ins
]
=
quant_ins
;
return
ins_fp16
;
return
quant_ins
;
}
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
void
quantize_fp16
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
for
(
auto
ins
:
iterator_for
(
prog
))
...
...
@@ -53,13 +124,14 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
input_fp16
{};
if
(
input
->
name
()
==
"convert"
)
if
(
input
->
name
()
==
"convert"
and
input
->
inputs
().
front
()
->
get_shape
().
type
()
==
shape
::
half_type
)
{
input_fp16
=
input
->
inputs
().
front
();
}
else
{
input_fp16
=
insert_
fp16
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
input_fp16
=
insert_
quant_ins
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
}
converted_inputs
.
push_back
(
input_fp16
);
}
...
...
@@ -79,29 +151,390 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
if
(
ins_shape
.
type
()
!=
orig_type
)
{
// insert another convert instruction to convert it back
if
(
ins
==
std
::
prev
(
prog
.
end
()))
// check the dead code case to avoid assert
bool
output_empty
=
ins
->
outputs
().
empty
();
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
if
(
!
output_empty
)
{
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
}
}
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
}
}
static
void
ins_quantize_int8
(
program
&
prog
,
instruction_ref
ins
,
std
::
vector
<
instruction_ref
>&
converted_inputs
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
ins_quant_params
)
{
auto
orig_type
=
ins
->
get_shape
().
type
();
auto
inputs
=
ins
->
inputs
();
if
(
ins
->
name
()
==
"dot"
)
{
auto
dot_op
=
any_cast
<
op
::
dot
>
(
ins
->
get_operator
());
float
new_alpha
=
dot_op
.
alpha
/
(
ins_quant_params
[
0
].
first
*
ins_quant_params
[
1
].
first
);
float
new_beta
=
dot_op
.
beta
;
// We need additional checking about the quant_alpha value. If
// abs(quant_alpha) > 50 (some tmp value set here), we can convert
// it to an integer as the new_alpha in the quant_dot
float
threshold
=
50.0
f
;
if
(
fabs
(
new_alpha
)
>=
threshold
&&
fabs
(
new_beta
)
>=
threshold
)
{
int32_t
quant_alpha
=
static_cast
<
int32_t
>
(
std
::
round
(
new_alpha
));
int32_t
quant_beta
=
static_cast
<
int32_t
>
(
std
::
round
(
new_beta
));
if
(
shape
::
int32_type
==
orig_type
)
{
prog
.
add_instruction
(
op
::
convert
{
orig_type
},
ins
);
prog
.
replace_instruction
(
ins
,
op
::
quant_dot
{
quant_alpha
,
quant_beta
},
converted_inputs
);
}
else
{
// check the dead code case to avoid assert
bool
output_empty
=
ins
->
outputs
().
empty
();
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
if
(
!
output_empty
)
auto
quant_dot
=
prog
.
insert_instruction
(
ins
,
op
::
quant_dot
{
quant_alpha
,
quant_beta
},
converted_inputs
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
quant_dot
);
}
}
// either alpha or beta cannot be quantized because of too big
// relative rounding error
else
{
if
(
converted_inputs
.
size
()
==
3
)
{
converted_inputs
.
pop_back
();
}
auto
q_dot
=
prog
.
insert_instruction
(
ins
,
op
::
quant_dot
{
1
,
0
},
converted_inputs
);
auto
f_dot
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
shape
::
float_type
},
q_dot
);
auto
c_shape
=
q_dot
->
get_shape
();
std
::
vector
<
float
>
vec_alpha
(
c_shape
.
elements
(),
new_alpha
);
auto
l_alpha
=
prog
.
add_literal
(
literal
({
shape
::
float_type
,
c_shape
.
lens
()},
vec_alpha
));
if
(
inputs
.
size
()
==
3
and
dot_op
.
beta
!=
0.0
f
)
{
auto
alpha_ab
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_alpha
,
f_dot
);
std
::
vector
<
float
>
vec_beta
(
c_shape
.
elements
(),
dot_op
.
beta
);
auto
l_beta
=
prog
.
add_literal
(
literal
({
shape
::
float_type
,
c_shape
.
lens
()},
vec_beta
));
instruction_ref
beta_c
{};
if
(
orig_type
!=
shape
::
float_type
)
{
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
auto
fp32_c
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
shape
::
float_type
},
inputs
.
back
());
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
fp32_c
);
}
else
{
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
inputs
.
back
());
}
if
(
orig_type
==
shape
::
float_type
)
{
prog
.
replace_instruction
(
ins
,
op
::
add
{},
alpha_ab
,
beta_c
);
}
else
{
auto
f_res
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
alpha_ab
,
beta_c
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
f_res
);
}
}
else
{
if
(
orig_type
==
shape
::
float_type
)
{
prog
.
replace_instruction
(
ins
,
op
::
mul
{},
l_alpha
,
f_dot
);
}
else
{
auto
alpha_ab
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_alpha
,
f_dot
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
alpha_ab
);
}
}
}
}
else
if
(
ins
->
name
()
==
"convolution"
)
{
// Current MIOpen convolution does not support alpha and beta,
// so we need a separate multiply to adjust the output
auto
conv_op
=
any_cast
<
op
::
convolution
>
(
ins
->
get_operator
());
auto
padding
=
conv_op
.
padding
;
auto
stride
=
conv_op
.
stride
;
auto
dilation
=
conv_op
.
dilation
;
auto
padding_mode
=
conv_op
.
padding_mode
;
auto
group
=
conv_op
.
group
;
auto
adjust_factor
=
1.0
f
/
(
ins_quant_params
[
0
].
first
*
ins_quant_params
[
1
].
first
);
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
auto
quant_conv
=
prog
.
insert_instruction
(
ins
,
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
converted_inputs
);
float
threshold
=
50.0
f
;
std
::
vector
<
float
>
vec_factor
(
quant_conv
->
get_shape
().
elements
(),
adjust_factor
);
if
(
quant_conv
->
get_shape
().
type
()
==
orig_type
and
adjust_factor
>=
threshold
)
{
auto
l_factor
=
prog
.
add_literal
(
literal
(
quant_conv
->
get_shape
(),
vec_factor
.
begin
(),
vec_factor
.
end
()));
prog
.
replace_instruction
(
ins
,
op
::
mul
{},
quant_conv
,
l_factor
);
}
// convert quant_conv output to float type, multiply the factor and
// conver back to original type
else
{
auto
float_conv
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
shape
::
float_type
},
quant_conv
);
auto
l_factor
=
prog
.
add_literal
(
literal
(
float_conv
->
get_shape
(),
vec_factor
));
if
(
orig_type
==
shape
::
float_type
)
{
prog
.
replace_instruction
(
ins
,
op
::
mul
{},
l_factor
,
float_conv
);
}
else
{
auto
adjusted_conv
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_factor
,
float_conv
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
adjusted_conv
);
}
}
}
else
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: does not support operator "
+
ins
->
name
());
}
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// To simplify the changes, we consider shift as 0.0f for now.
void
quantize_int8_impl
(
program
&
prog
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
quant_params
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
if
(
enabled
(
MIGRAPHX_INT8_QUANTIZATION_PARAMS
{}))
{
for
(
std
::
size_t
i
=
0
;
i
<
quant_params
.
size
();
++
i
)
{
auto
param
=
quant_params
.
at
(
i
);
std
::
cout
<<
"ins_index = "
<<
i
<<
", scale = "
<<
param
.
first
<<
", shift = "
<<
param
.
second
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
// For now, we only support the int8 quantization of gemm and convolution
std
::
set
<
std
::
string
>
op_names
=
{
"convolution"
,
"dot"
};
std
::
set
<
std
::
string
>
input_ins_names
(
ins_names
.
begin
(),
ins_names
.
end
());
if
(
!
std
::
includes
(
op_names
.
begin
(),
op_names
.
end
(),
input_ins_names
.
begin
(),
input_ins_names
.
end
()))
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: only support DOT and CONVOLUTION operation"
);
}
std
::
size_t
quant_param_index
=
0
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_quant_ins
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
map_ins_index
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std
::
vector
<
instruction_ref
>
converted_inputs
;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
std
::
pair
<
float
,
float
>>
ins_quant_params
;
for
(
auto
input
:
inputs
)
{
// calculate the index of each instruction to be quantized
std
::
size_t
ins_index
=
(
map_ins_index
.
count
(
input
)
>
0
)
?
map_ins_index
[
input
]
:
quant_param_index
++
;
map_ins_index
[
input
]
=
ins_index
;
auto
param
=
quant_params
[
map_ins_index
[
input
]];
ins_quant_params
.
push_back
(
param
);
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape
::
type_t
quant_type
=
shape
::
int8_type
;
if
((
ins
->
name
()
==
"dot"
)
and
(
inputs
.
size
()
==
3
)
and
(
input
==
inputs
.
back
()))
{
quant_type
=
shape
::
int32_type
;
}
auto
s
=
input
->
get_shape
();
if
((
s
.
type
()
==
shape
::
float_type
or
s
.
type
()
==
shape
::
double_type
or
s
.
type
()
==
shape
::
half_type
or
s
.
type
()
==
shape
::
int32_type
)
and
s
.
type
()
!=
quant_type
)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
quant_input
{};
if
(
input
->
name
()
==
"convert"
and
input
->
inputs
().
front
()
->
get_shape
().
type
()
==
quant_type
)
{
quant_input
=
input
->
inputs
().
front
();
// the scale in this case is not used, so tune the scale
// to 1.0f for this parameter
ins_quant_params
.
back
()
=
std
::
pair
<
float
,
float
>
(
1.0
f
,
0.0
f
);
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
converted_inputs
.
push_back
(
quant_input
);
}
else
{
converted_inputs
.
push_back
(
input
);
}
}
// no change for the input, go to the next instruction
if
(
inputs
==
converted_inputs
)
{
continue
;
}
ins_quantize_int8
(
prog
,
ins
,
converted_inputs
,
ins_quant_params
);
}
if
(
quant_param_index
!=
quant_params
.
size
())
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: number of scales does not match"
);
}
}
void
quantize_int8
(
program
&
prog
,
const
target
&
t
,
const
std
::
vector
<
program
::
parameter_map
>&
calibration
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
// insert capture operator
auto
cap_prog
=
prog
;
auto
int8_quant_params
=
capture_arguments
(
cap_prog
,
t
,
ins_names
);
// use the calibration data to compute the quantization scale
cap_prog
.
compile
(
t
);
// use all calibration data to run the program to calculate the
// quantization scale and shift
for
(
auto
&&
arg
:
calibration
)
{
program
::
parameter_map
m
;
for
(
auto
&&
x
:
cap_prog
.
get_parameter_shapes
())
{
if
(
arg
.
count
(
x
.
first
)
>
0
)
{
assert
(
x
.
second
==
arg
.
at
(
x
.
first
).
get_shape
());
m
[
x
.
first
]
=
t
.
copy_to
(
arg
.
at
(
x
.
first
));
}
else
{
m
[
x
.
first
]
=
t
.
allocate
(
x
.
second
);
}
}
cap_prog
.
eval
(
m
);
}
quantize_int8_impl
(
prog
,
*
int8_quant_params
,
ins_names
);
}
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
std
::
size_t
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
)
{
size_t
num_quant_params
=
0
;
// the int8 quantization only support dot and convolution
std
::
set
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
std
::
set
<
std
::
string
>
input_ins_names
(
ins_names
.
begin
(),
ins_names
.
end
());
if
(
!
std
::
includes
(
op_names
.
begin
(),
op_names
.
end
(),
input_ins_names
.
begin
(),
input_ins_names
.
end
()))
{
MIGRAPHX_THROW
(
"CAPTURE_ARGUMENTS: input operator is not supported"
);
}
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
instruction_ref
>
new_args
;
for
(
auto
input
:
inputs
)
{
instruction_ref
new_ins
{};
if
(
ins_map
.
count
(
input
)
>
0
)
{
new_ins
=
ins_map
[
input
];
}
else
{
new_ins
=
prog
.
insert_instruction
(
std
::
next
(
input
),
op
::
capture
{
num_quant_params
++
,
func
},
input
);
ins_map
[
input
]
=
new_ins
;
}
new_args
.
push_back
(
new_ins
);
}
instruction
::
replace
(
ins
,
ins
->
get_operator
(),
ins
->
get_shape
(),
new_args
);
}
return
num_quant_params
;
}
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments_impl
(
program
&
prog
,
const
target
&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
int8_quant_params
=
std
::
make_shared
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
();
std
::
shared_ptr
<
std
::
vector
<
float
>>
max_abs_vals
=
std
::
make_shared
<
std
::
vector
<
float
>>
();
auto
calc_quant_params
=
[
int8_quant_params
,
max_abs_vals
,
&
t
](
std
::
size_t
ins_index
,
std
::
vector
<
argument
>
args
)
{
std
::
pair
<
float
,
float
>
param_pair
{
64.0
f
,
0.0
f
};
// scale and shift is need for only int8 type, and we do not
// consider shift, so set shift to 0
std
::
vector
<
float
>
vec_val
;
argument
arg
=
t
.
copy_from
(
args
.
front
());
arg
.
visit
([
&
](
auto
output
)
{
vec_val
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
max_val
=
*
std
::
max_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
min_val
=
*
std
::
min_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
max_abs
=
std
::
max
(
std
::
fabs
(
max_val
),
std
::
fabs
(
min_val
));
max_abs_vals
->
at
(
ins_index
)
=
std
::
max
(
max_abs_vals
->
at
(
ins_index
),
max_abs
);
// if all values are 0, no need to do scaling
if
(
max_abs_vals
->
at
(
ins_index
)
==
0.0
f
)
{
param_pair
.
first
=
1.0
f
;
}
else
{
param_pair
.
first
=
127.0
f
/
max_abs_vals
->
at
(
ins_index
);
}
int8_quant_params
->
at
(
ins_index
)
=
param_pair
;
};
auto
num_params
=
capture_arguments
(
prog
,
ins_names
,
calc_quant_params
);
int8_quant_params
->
resize
(
num_params
,
std
::
pair
<
float
,
float
>
(
64.0
f
,
0.0
f
));
max_abs_vals
->
resize
(
num_params
,
0.0
f
);
return
int8_quant_params
;
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/
fwd_conv
_batchnorm
_rewrite
.cpp
→
src/
rewrite
_batchnorm.cpp
View file @
20b1d690
#include <migraphx/
fwd_conv
_batchnorm
_rewrite
.hpp>
#include <migraphx/
rewrite
_batchnorm.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/dfor.hpp>
...
...
@@ -11,7 +12,7 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
fwd_conv
_batchnorm
_rewrite
::
apply
(
program
&
p
)
const
void
rewrite
_batchnorm
::
apply
(
program
&
p
)
const
{
for
(
auto
ins
:
iterator_for
(
p
))
{
...
...
@@ -25,46 +26,30 @@ void fwd_conv_batchnorm_rewrite::apply(program& p) const
if
(
any_of
({
gamma
,
bias
,
mean
,
variance
},
[](
auto
arg
)
{
return
arg
.
empty
();
}))
continue
;
auto
conv_ins
=
ins
->
inputs
()[
0
];
if
(
conv_ins
->
name
()
!=
"convolution"
)
continue
;
// Get convolution weights
auto
weights
=
conv_ins
->
inputs
()[
1
]
->
eval
();
if
(
weights
.
empty
())
continue
;
auto
s
=
shape
{
ins
->
get_shape
().
type
(),
{
ins
->
get_shape
().
lens
()[
1
]}};
// Get epsilon
auto
bn_op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
auto
epsilon
=
bn_op
.
epsilon
;
// Get convolution op
auto
conv_op
=
conv_ins
->
get_operator
();
auto
weights_lens
=
weights
.
get_shape
().
lens
();
auto
conv_lens
=
conv_ins
->
get_shape
().
lens
();
argument
new_weights
{
weights
.
get_shape
()};
argument
new_bias
{{
bias
.
get_shape
().
type
(),
{
bias
.
get_shape
().
elements
()}}};
visit_all
(
weights
,
gamma
,
bias
,
mean
,
variance
,
new_weights
,
new_bias
)(
[
&
](
auto
weights2
,
auto
gamma2
,
auto
bias2
,
auto
mean2
,
auto
variance2
,
auto
new_weights2
,
auto
new_bias2
)
{
dfor
(
weights_lens
[
0
],
weights_lens
[
1
],
weights_lens
[
2
],
weights_lens
[
3
])(
[
&
](
std
::
size_t
k
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
new_weights2
(
k
,
c
,
h
,
w
)
=
gamma2
[
k
]
/
std
::
sqrt
(
variance2
[
k
]
+
epsilon
)
*
weights2
(
k
,
c
,
h
,
w
);
});
dfor
(
new_bias
.
get_shape
().
elements
())([
&
](
std
::
size_t
c
)
{
new_bias2
[
c
]
=
bias2
[
c
]
-
(
gamma2
[
c
]
*
mean2
[
c
]
/
std
::
sqrt
(
variance2
[
c
]
+
epsilon
));
argument
a
{
s
};
argument
b
{
s
};
visit_all
(
gamma
,
bias
,
mean
,
variance
,
a
,
b
)(
[
&
](
auto
gamma2
,
auto
bias2
,
auto
mean2
,
auto
variance2
,
auto
a2
,
auto
b2
)
{
dfor
(
a
.
get_shape
().
elements
())(
[
&
](
std
::
size_t
c
)
{
a2
[
c
]
=
gamma2
[
c
]
/
std
::
sqrt
(
variance2
[
c
]
+
epsilon
);
});
dfor
(
b
.
get_shape
().
elements
())([
&
](
std
::
size_t
c
)
{
b2
[
c
]
=
bias2
[
c
]
-
(
gamma2
[
c
]
*
mean2
[
c
]
/
std
::
sqrt
(
variance2
[
c
]
+
epsilon
));
});
});
// Replace convolution instruction with updated weights
auto
l_weights
=
p
.
add_literal
({
weights
.
get_shape
(),
new_weights
.
data
()});
auto
l_bias
=
p
.
add_literal
({
new_bias
.
get_shape
(),
new_bias
.
data
()});
auto
c
=
p
.
replace_instruction
(
conv_ins
,
conv_op
,
{
conv_ins
->
inputs
()[
0
],
l_weights
});
auto
b
=
p
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
c
->
get_shape
().
lens
()},
l_bias
);
p
.
replace_instruction
(
ins
,
op
::
add
{},
{
c
,
b
});
auto
broadcast
=
op
::
broadcast
{
1
,
ins
->
get_shape
().
lens
()};
auto
a_ins
=
p
.
add_literal
({
a
.
get_shape
(),
a
.
data
()});
auto
a_broadcast
=
p
.
insert_instruction
(
ins
,
broadcast
,
a_ins
);
auto
mul
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
ins
->
inputs
().
front
(),
a_broadcast
);
auto
b_ins
=
p
.
add_literal
({
b
.
get_shape
(),
b
.
data
()});
auto
b_broadcast
=
p
.
insert_instruction
(
ins
,
broadcast
,
b_ins
);
auto
add
=
p
.
insert_instruction
(
ins
,
op
::
add
{},
mul
,
b_broadcast
);
p
.
replace_instruction
(
ins
,
add
);
}
}
...
...
src/rewrite_pooling.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/reduce_mean.hpp>
#include <migraphx/program.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
rewrite_pooling
::
apply
(
program
&
prog
)
const
{
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
ins
->
name
()
!=
"pooling"
)
continue
;
if
(
ins
->
get_shape
().
lens
().
size
()
!=
4
)
continue
;
if
(
ins
->
inputs
().
empty
())
continue
;
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
if
(
op
.
mode
!=
"average"
)
continue
;
if
(
op
.
padding
[
0
]
!=
0
and
op
.
padding
[
1
]
!=
0
)
continue
;
if
(
op
.
stride
[
0
]
!=
1
and
op
.
stride
[
1
]
!=
1
)
continue
;
if
(
s
.
lens
()[
2
]
!=
op
.
lengths
[
0
]
and
s
.
lens
()[
3
]
!=
op
.
lengths
[
1
])
continue
;
std
::
int64_t
n
=
s
.
lens
()[
0
];
std
::
int64_t
c
=
s
.
lens
()[
1
];
auto
reshape
=
prog
.
insert_instruction
(
ins
,
op
::
reshape
{{
n
*
c
,
-
1
}},
ins
->
inputs
().
front
());
auto
pooling
=
prog
.
insert_instruction
(
ins
,
op
::
reduce_mean
{{
1
}},
reshape
);
prog
.
replace_instruction
(
ins
,
op
::
reshape
{{
n
,
c
,
1
,
1
}},
pooling
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/rewrite_rnn.cpp
View file @
20b1d690
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/concat.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/gru.hpp>
#include <migraphx/op/lstm.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/rnn.hpp>
#include <migraphx/op/rnn_last_output.hpp>
#include <migraphx/op/slice.hpp>
#include <migraphx/op/squeeze.hpp>
#include <migraphx/op/sub.hpp>
#include <migraphx/op/transpose.hpp>
#include <migraphx/op/unsqueeze.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/common.hpp>
...
...
@@ -204,17 +217,19 @@ std::vector<instruction_ref> rewrite_rnn::vanilla_rnn_cell(bool is_forward,
auto
tran_sr
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
sr
);
// initial hidden state
auto
sih
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
ih
);
auto
sih
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
ih
);
auto
sih_lens
=
sih
->
get_shape
().
lens
();
// bias
instruction_ref
bb
{};
if
(
bias
!=
prog
.
end
())
{
long
hs
=
r
->
get_shape
().
lens
()[
2
];
long
hs
=
static_cast
<
long
>
(
r
->
get_shape
().
lens
()[
2
]
)
;
auto
sbias
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
bias
);
auto
wb
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sbias
);
auto
rb
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sbias
);
auto
b
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wb
,
rb
);
b
ias
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
().
lens
()
},
b
);
auto
wrb
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wb
,
rb
);
b
b
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
_
lens
},
wr
b
);
}
instruction_ref
hidden_out
=
prog
.
end
();
...
...
@@ -228,20 +243,15 @@ std::vector<instruction_ref> rewrite_rnn::vanilla_rnn_cell(bool is_forward,
xt
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
xt
);
auto
xt_wi
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_sw
);
auto
ht_ri
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_sr
);
auto
xt_ht
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wi
,
ht_ri
);
instruction_ref
ht
;
if
(
bias
!=
prog
.
end
())
{
ht
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_ht
,
bias
);
}
else
{
ht
=
xt_ht
;
xt_wi
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wi
,
bb
);
}
auto
xt_ht
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wi
,
ht_ri
);
// apply activation function
ht
=
prog
.
insert_instruction
(
ins
,
actv_func
,
ht
);
sih
=
ht
;
auto
ht
=
prog
.
insert_instruction
(
ins
,
actv_func
,
xt_
ht
);
sih
=
ht
;
// add the dimensions of sequence length (axis 0 for sequence length,
// axis 1 for num_directions
...
...
@@ -485,62 +495,41 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
long
hs
=
static_cast
<
long
>
(
r_shape
.
lens
()[
2
]);
migraphx
::
shape
s
(
seq_shape
.
type
(),
{
seq_shape
.
lens
()[
1
],
r_shape
.
lens
()[
2
]});
std
::
vector
<
in
t
>
data
(
s
.
elements
(),
1
);
std
::
vector
<
floa
t
>
data
(
s
.
elements
(),
1
.0
f
);
auto
l1
=
prog
.
add_literal
(
migraphx
::
literal
{
s
,
data
});
// w
eight
matrix
// w matrix
squeeze to 2-dim and do a transpose
std
::
vector
<
int64_t
>
perm
{
1
,
0
};
auto
sw
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
w
);
auto
wz
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sw
);
auto
tran_wz
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
wz
);
auto
wr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sw
);
auto
tran_wr
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
wr
);
auto
sw
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
w
);
auto
tw
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
sw
);
auto
wh
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sw
);
auto
tran_wh
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
wh
);
auto
sr
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
r
);
auto
rz
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sr
);
auto
tran_rz
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
rz
);
// r slide to two part, zr and h
auto
sr
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
r
);
auto
rzr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
2
*
hs
}},
sr
);
auto
trzr
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
rzr
);
auto
rr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sr
);
auto
tran_rr
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
rr
);
auto
rh
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sr
);
auto
tran_rh
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
rh
);
auto
rh
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sr
);
auto
trh
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
rh
);
// initial states
auto
sih
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
ih
);
auto
sih
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
ih
);
size_t
bs
=
ih
->
get_shape
().
lens
()[
1
];
// bias
instruction_ref
brcst_bz
{};
instruction_ref
brcst_br
{};
instruction_ref
brcst_wbh
{};
instruction_ref
brcst_rbh
{};
instruction_ref
brcst_bh
{};
instruction_ref
bwb
{};
instruction_ref
brb_zr
{};
instruction_ref
brb_h
{};
if
(
bias
!=
prog
.
end
())
{
auto
broadcast_lens
=
sih
->
get_shape
().
lens
();
auto
sbias
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
bias
);
auto
wbz
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sbias
);
auto
wbr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sbias
);
auto
wbh
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sbias
);
brcst_wbh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
wbh
);
auto
rbz
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
3
*
hs
},
{
4
*
hs
}},
sbias
);
auto
rbr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
4
*
hs
},
{
5
*
hs
}},
sbias
);
auto
rbh
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
5
*
hs
},
{
6
*
hs
}},
sbias
);
brcst_rbh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
rbh
);
auto
bz
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wbz
,
rbz
);
brcst_bz
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
bz
);
auto
br
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wbr
,
rbr
);
brcst_br
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
br
);
auto
bh
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wbh
,
rbh
);
brcst_bh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
bh
);
auto
sbias
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
bias
);
auto
wb
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
3
*
hs
}},
sbias
);
bwb
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
{
bs
,
static_cast
<
size_t
>
(
3
*
hs
)}},
wb
);
auto
rb_zr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
3
*
hs
},
{
5
*
hs
}},
sbias
);
auto
rb_h
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
5
*
hs
},
{
6
*
hs
}},
sbias
);
brb_zr
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
{
bs
,
static_cast
<
size_t
>
(
2
*
hs
)}},
rb_zr
);
brb_h
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
{
bs
,
static_cast
<
size_t
>
(
hs
)}},
rb_h
);
}
for
(
long
i
=
0
;
i
<
seq_len
;
i
++
)
...
...
@@ -549,56 +538,51 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
auto
xt
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
seq_index
},
{
seq_index
+
1
}},
seq
);
xt
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
xt
);
// equation f(xt*(Wz^T) + Ht-1 * (Rz^T) + Wbz + Rbz)
auto
xt_wz
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wz
);
auto
ht_rz
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_rz
);
auto
xht_z
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wz
,
ht_rz
);
auto
xt_w
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tw
);
auto
ih1_rzr
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
trzr
);
if
(
bias
!=
prog
.
end
())
{
xht_z
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xht_z
,
brcst_bz
);
xt_w
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_w
,
bwb
);
ih1_rzr
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ih1_rzr
,
brb_zr
);
}
auto
zt
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
xht_z
);
// equation f(Xt*(Wr^T) + Ht-1*(Rr^T) + Wbr + Rbr)
auto
xt_wr
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wr
);
auto
ht_rr
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_rr
);
auto
xht_r
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wr
,
ht_rr
);
if
(
bias
!=
prog
.
end
())
{
xht_r
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xht_r
,
brcst_br
);
}
auto
rt
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
xht_r
);
auto
xw_z
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
0
},
{
hs
}},
xt_w
);
auto
xw_r
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
hs
},
{
2
*
hs
}},
xt_w
);
auto
xw_h
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
2
*
hs
},
{
3
*
hs
}},
xt_w
);
auto
hr_z
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
0
},
{
hs
}},
ih1_rzr
);
auto
hr_r
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
hs
},
{
2
*
hs
}},
ih1_rzr
);
instruction_ref
xht_h
;
auto
xw_hr_z
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xw_z
,
hr_z
);
auto
zt
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
xw_hr_z
);
auto
xw_hr_r
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xw_r
,
hr_r
);
auto
rt
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
xw_hr_r
);
instruction_ref
hr_h
{};
if
(
linear_before_reset
==
0
)
{
// equation g(Xt*(Wh^T) + (rt (.) Ht-1)*(Rh^T) + Rbh + Wbh)
auto
xt_wh
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wh
);
auto
rt_ht1
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
rt
,
sih
);
auto
rt_rh
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
rt_ht1
,
tran_rh
);
xht_h
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wh
,
rt_rh
);
hr_h
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
rt_ht1
,
trh
);
if
(
bias
!=
prog
.
end
())
{
xht
_h
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xht
_h
,
br
cst_
bh
);
hr
_h
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
hr
_h
,
brb
_
h
);
}
}
else
{
// equation ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh)
auto
xt_wh
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wh
);
auto
ht1_rh
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_rh
);
if
(
bias
!=
prog
.
end
())
{
ht1_rh
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ht1_rh
,
brcst_rbh
);
}
auto
rt_rh
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
rt
,
ht1_rh
);
xht_h
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wh
,
rt_rh
);
auto
ht1_rh
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
trh
);
if
(
bias
!=
prog
.
end
())
{
x
ht
_
h
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
x
ht
_
h
,
br
cst_w
bh
);
ht
1_r
h
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ht
1_r
h
,
brb
_
h
);
}
hr_h
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
rt
,
ht1_rh
);
}
auto
ht
=
prog
.
insert_instruction
(
ins
,
actv_func2
,
xht_h
);
auto
xw_hr_h
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xw_h
,
hr_h
);
auto
ht
=
prog
.
insert_instruction
(
ins
,
actv_func2
,
xw_hr_h
);
// equation Ht = (1 - zt) (.) ht + zt (.) Ht-1
auto
one_minus_zt
=
prog
.
insert_instruction
(
ins
,
op
::
sub
{},
l1
,
zt
);
...
...
@@ -683,7 +667,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
std
::
vector
<
float
>
ihc_data
(
ihc_shape
.
elements
(),
0.0
);
migraphx
::
shape
pph_shape
{
type
,
{
1
,
3
*
hidden_size
}};
std
::
vector
<
float
>
pph_data
(
pph_shape
.
elements
(),
0.0
);
auto
actv_funcs
=
lstm_actv_funcs
(
ins
);
auto
lstm_op
=
any_cast
<
op
::
lstm
>
(
ins
->
get_operator
());
...
...
@@ -913,35 +896,16 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
migraphx
::
shape
r_shape
=
r
->
get_shape
();
long
seq_len
=
static_cast
<
long
>
(
seq_shape
.
lens
()[
0
]);
long
hs
=
static_cast
<
long
>
(
r_shape
.
lens
()[
2
]);
auto
bs
=
ih
->
get_shape
().
lens
()[
1
];
std
::
vector
<
int64_t
>
perm
{
1
,
0
};
// w matrix
auto
sw
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
w
);
auto
wi
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sw
);
auto
tran_wi
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
wi
);
auto
wo
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sw
);
auto
tran_wo
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
wo
);
auto
wf
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sw
);
auto
tran_wf
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
wf
);
auto
wc
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
3
*
hs
},
{
4
*
hs
}},
sw
);
auto
tran_wc
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
wc
);
// w matrix, squeeze and transpose
auto
sw
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
w
);
auto
tsw
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
sw
);
// r matrix
auto
sr
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
r
);
auto
ri
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sr
);
auto
tran_ri
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
ri
);
auto
ro
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sr
);
auto
tran_ro
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
ro
);
auto
rf
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sr
);
auto
tran_rf
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
rf
);
auto
rc
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
3
*
hs
},
{
4
*
hs
}},
sr
);
auto
tran_rc
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
rc
);
// r matrix, squeeze and transpose
auto
sr
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
r
);
auto
tsr
=
prog
.
insert_instruction
(
ins
,
op
::
transpose
{
perm
},
sr
);
// initial hidden state
auto
sih
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
ih
);
...
...
@@ -951,40 +915,23 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
auto
ic_lens
=
sic
->
get_shape
().
lens
();
// bias
instruction_ref
bi_brcst
{};
instruction_ref
bo_brcst
{};
instruction_ref
bf_brcst
{};
instruction_ref
bc_brcst
{};
instruction_ref
wrb
{};
if
(
bias
!=
prog
.
end
())
{
auto
sbias
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
bias
);
auto
bxi
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sbias
);
auto
bhi
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
4
*
hs
},
{
5
*
hs
}},
sbias
);
auto
bi
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxi
,
bhi
);
bi_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_lens
},
bi
);
auto
bxo
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sbias
);
auto
bho
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
5
*
hs
},
{
6
*
hs
}},
sbias
);
auto
bo
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxo
,
bho
);
bo_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_lens
},
bo
);
auto
bxf
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sbias
);
auto
bhf
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
6
*
hs
},
{
7
*
hs
}},
sbias
);
auto
bf
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxf
,
bhf
);
bf_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_lens
},
bf
);
auto
bxc
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
3
*
hs
},
{
4
*
hs
}},
sbias
);
auto
bhc
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
7
*
hs
},
{
8
*
hs
}},
sbias
);
auto
bc
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxc
,
bhc
);
bc_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_lens
},
bc
);
auto
sbias
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
bias
);
auto
ub_wb
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
4
*
hs
}},
sbias
);
auto
ub_rb
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
4
*
hs
},
{
8
*
hs
}},
sbias
);
auto
ub_wrb
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ub_wb
,
ub_rb
);
wrb
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
{
bs
,
4
*
static_cast
<
size_t
>
(
hs
)}},
ub_wrb
);
}
// peep hole
instruction_ref
pphi_brcst
{};
instruction_ref
ppho_brcst
{};
instruction_ref
pphf_brcst
{};
if
(
pph
!=
prog
.
end
())
{
auto
spph
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
pph
);
...
...
@@ -1004,44 +951,31 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
auto
xt
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
seq_index
},
{
seq_index
+
1
}},
seq
);
xt
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
xt
);
// equation it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
auto
xt_wi
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wi
);
auto
ht_ri
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_ri
);
auto
it_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wi
,
ht_ri
);
if
(
pph
!=
prog
.
end
())
{
auto
pphi_ct
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
pphi_brcst
,
sic
);
it_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
it_before_actv
,
pphi_ct
);
}
auto
xt_tsw
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tsw
);
auto
sih_tsr
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tsr
);
auto
xt_sih
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_tsw
,
sih_tsr
);
if
(
bias
!=
prog
.
end
())
{
i
t_
before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
i
t_
before_actv
,
bi_brcst
);
x
t_
sih
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
x
t_
sih
,
wrb
);
}
auto
it
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
it_before_actv
);
// equation ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
auto
xt_wf
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wf
);
auto
ht_rf
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_rf
);
auto
ft_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wf
,
ht_rf
);
auto
it_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
0
},
{
hs
}},
xt_sih
);
auto
ot_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
hs
},
{
2
*
hs
}},
xt_sih
);
auto
ft_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
2
*
hs
},
{
3
*
hs
}},
xt_sih
);
auto
ct_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
1
},
{
3
*
hs
},
{
4
*
hs
}},
xt_sih
);
if
(
pph
!=
prog
.
end
())
{
auto
pphi_ct
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
pphi_brcst
,
sic
);
it_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
it_before_actv
,
pphi_ct
);
auto
pphf_ct
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
pphf_brcst
,
sic
);
ft_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ft_before_actv
,
pphf_ct
);
}
if
(
bias
!=
prog
.
end
())
{
ft_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ft_before_actv
,
bf_brcst
);
}
auto
it
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
it_before_actv
);
auto
ft
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
ft_before_actv
);
// equation ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
auto
xt_wc
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wc
);
auto
ht_rc
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_rc
);
auto
ct_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wc
,
ht_rc
);
if
(
bias
!=
prog
.
end
())
{
ct_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ct_before_actv
,
bc_brcst
);
}
auto
ct
=
prog
.
insert_instruction
(
ins
,
actv_func2
,
ct_before_actv
);
// equation Ct = ft (.) Ct-1 + it (.) ct
...
...
@@ -1050,19 +984,11 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
auto
cellt
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ft_cell
,
it_ct
);
last_cell_output
=
cellt
;
// ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
auto
xt_wo
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
xt
,
tran_wo
);
auto
ht_ro
=
prog
.
insert_instruction
(
ins
,
op
::
dot
{},
sih
,
tran_ro
);
auto
ot_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
xt_wo
,
ht_ro
);
if
(
pph
!=
prog
.
end
())
{
auto
ppho_cellt
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
ppho_brcst
,
cellt
);
ot_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ot_before_actv
,
ppho_cellt
);
}
if
(
bias
!=
prog
.
end
())
{
ot_before_actv
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
ot_before_actv
,
bo_brcst
);
}
auto
ot
=
prog
.
insert_instruction
(
ins
,
actv_func1
,
ot_before_actv
);
// Ht = ot (.) h(Ct)
...
...
src/shape.cpp
View file @
20b1d690
...
...
@@ -138,6 +138,24 @@ std::size_t shape::index(std::size_t i) const
return
result
;
}
}
std
::
vector
<
std
::
size_t
>
shape
::
multi
(
std
::
size_t
i
)
const
{
assert
(
this
->
standard
());
std
::
vector
<
std
::
size_t
>
indices
(
lens
().
size
());
std
::
transform
(
strides
().
begin
(),
strides
().
end
(),
lens
().
begin
(),
indices
.
begin
(),
[
&
](
std
::
size_t
stride
,
std
::
size_t
len
)
{
assert
(
len
>
0
and
stride
>
0
);
return
(
i
/
stride
)
%
len
;
});
return
indices
;
}
bool
shape
::
packed
()
const
{
return
this
->
elements
()
==
this
->
element_space
();
}
bool
shape
::
transposed
()
const
...
...
src/simplify_algebra.cpp
View file @
20b1d690
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/program.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/literal.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
find_add_lit_broadcast
auto
lit_broadcast
()
{
return
match
::
any_of
(
match
::
is_constant
(),
match
::
name
(
"broadcast"
));
}
auto
not_lit_broadcast
()
{
return
match
::
none_of
(
match
::
is_constant
(),
match
::
name
(
"broadcast"
));
}
auto
op_lit_broadcast
(
std
::
string
op
,
std
::
string
x
,
std
::
string
y
)
{
return
match
::
name
(
std
::
move
(
op
))(
match
::
either_arg
(
0
,
1
)(
lit_broadcast
().
bind
(
std
::
move
(
x
)),
not_lit_broadcast
().
bind
(
std
::
move
(
y
))));
}
auto
conv_const_weights
()
{
return
match
::
name
(
"convolution"
)(
match
::
used_once
(),
match
::
args
(
match
::
any
(),
match
::
is_constant
().
bind
(
"w"
)));
}
struct
find_mul_conv
{
auto
lit_broadcast
()
const
auto
matcher
()
const
{
return
match
::
any_of
(
match
::
name
(
"@literal"
),
match
::
name
(
"broadcast"
));
return
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
conv_const_weights
().
bind
(
"conv"
),
match
::
name
(
"broadcast"
).
bind
(
"a"
)));
}
auto
not_lit_broadcast
()
const
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
return
match
::
none_of
(
match
::
name
(
"@literal"
),
match
::
name
(
"broadcast"
));
auto
ins
=
r
.
result
;
auto
conv_ins
=
r
.
instructions
[
"conv"
];
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
w_ins
=
r
.
instructions
[
"w"
];
auto
broadcast_op
=
any_cast
<
op
::
broadcast
>
(
a_ins
->
get_operator
());
if
(
broadcast_op
.
axis
!=
1
)
return
;
auto
new_a
=
p
.
insert_instruction
(
ins
,
op
::
broadcast
{
0
,
w_ins
->
get_shape
().
lens
()},
a_ins
->
inputs
().
front
());
auto
new_mul
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
new_a
,
w_ins
);
auto
new_conv
=
p
.
insert_instruction
(
ins
,
conv_ins
->
get_operator
(),
conv_ins
->
inputs
().
front
(),
new_mul
);
p
.
replace_instruction
(
ins
,
new_conv
);
}
auto
add_lit_broadcast
(
std
::
string
x
,
std
::
string
y
)
const
};
// a * (x + b) => a * x + a * b
struct
find_mul_add
{
auto
matcher
()
const
{
return
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
lit_broadcast
().
bind
(
std
::
move
(
x
)),
not_lit_broadcast
().
bind
(
std
::
move
(
y
))));
return
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
any
().
bind
(
"x"
),
match
::
any_of
(
conv_const_weights
(),
match
::
is_constant
()).
bind
(
"b"
)),
match
::
none_of
(
match
::
args
(
match
::
is_constant
(),
match
::
is_constant
())),
match
::
used_once
()),
match
::
is_constant
().
bind
(
"a"
)));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
ins
=
r
.
result
;
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
assert
(
x_ins
!=
b_ins
);
auto
ax_ins
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
a_ins
,
x_ins
);
auto
ab_ins
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
a_ins
,
b_ins
);
p
.
replace_instruction
(
ins
,
op
::
add
{},
ax_ins
,
ab_ins
);
}
};
struct
find_add_lit_broadcast
{
auto
matcher
()
const
{
return
match
::
name
(
"add"
)(
match
::
args
(
add_lit_broadcast
(
"a"
,
"x"
),
add_lit_broadcast
(
"b"
,
"y"
)));
match
::
either_arg
(
0
,
1
)(
op_lit_broadcast
(
"add"
,
"a"
,
"x"
),
lit_broadcast
().
bind
(
"b"
)));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
auto
sumab
=
p
.
insert_instruction
(
ins
,
op
::
add
{},
a_ins
,
b_ins
);
p
.
replace_instruction
(
ins
,
op
::
add
{},
x_ins
,
sumab
);
}
};
struct
find_double_add_lit_broadcast
{
auto
matcher
()
const
{
return
match
::
name
(
"add"
)(
match
::
args
(
op_lit_broadcast
(
"add"
,
"a"
,
"x"
),
op_lit_broadcast
(
"add"
,
"b"
,
"y"
)));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
...
...
@@ -36,11 +117,9 @@ struct find_add_lit_broadcast
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
if
(
a_ins
->
name
()
!=
b_ins
->
name
())
return
;
instruction_ref
sumab
;
if
(
a_ins
->
name
()
==
"broadcast"
)
if
(
a_ins
->
name
()
==
"broadcast"
and
b_ins
->
name
()
==
"broadcast"
)
{
if
(
a_ins
->
inputs
().
at
(
0
)
->
get_shape
()
!=
b_ins
->
inputs
().
at
(
0
)
->
get_shape
())
return
;
...
...
@@ -59,7 +138,46 @@ struct find_add_lit_broadcast
}
};
void
simplify_algebra
::
apply
(
program
&
p
)
const
{
match
::
find_matches
(
p
,
find_add_lit_broadcast
{});
}
struct
find_inner_broadcast
{
auto
matcher
()
const
{
return
match
::
name
(
"mul"
,
"add"
)(
match
::
args
(
match
::
name
(
"broadcast"
).
bind
(
"x"
),
match
::
name
(
"broadcast"
).
bind
(
"y"
)));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
y_ins
=
r
.
instructions
[
"y"
];
auto
xbroadcast
=
any_cast
<
op
::
broadcast
>
(
x_ins
->
get_operator
());
auto
ybroadcast
=
any_cast
<
op
::
broadcast
>
(
y_ins
->
get_operator
());
if
(
xbroadcast
.
axis
!=
ybroadcast
.
axis
)
return
;
auto
op
=
p
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
x_ins
->
inputs
().
front
(),
y_ins
->
inputs
().
front
());
p
.
replace_instruction
(
ins
,
xbroadcast
,
op
);
}
};
void
simplify_algebra
::
apply
(
program
&
p
)
const
{
// Run simplifications multiple times
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
match
::
find_matches
(
p
,
find_inner_broadcast
{},
find_double_add_lit_broadcast
{},
find_add_lit_broadcast
{},
find_mul_conv
{},
find_mul_add
{});
dead_code_elimination
{}.
apply
(
p
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/simplify_reshapes.cpp
View file @
20b1d690
...
...
@@ -2,14 +2,17 @@
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op/as_shape.hpp>
#include <migraphx/op/transpose.hpp>
#include <migraphx/op/concat.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/matcher.hpp>
#include <unordered_set>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
bool
is_reshaper
(
instruction_ref
ins
)
const
auto
&
reshaper_names
(
)
{
// clang-format off
static
const
std
::
unordered_set
<
std
::
string
>
names
=
{
...
...
@@ -19,17 +22,10 @@ bool is_reshaper(instruction_ref ins)
"unsqueeze"
};
// clang-format on
return
contains
(
names
,
ins
->
name
())
;
return
names
;
}
bool
is_transpose_output
(
instruction_ref
ins
)
{
if
(
ins
->
outputs
().
size
()
!=
1
)
return
false
;
if
(
ins
->
outputs
().
front
()
->
name
()
==
"contiguous"
)
return
is_transpose_output
(
ins
->
outputs
().
front
());
return
ins
->
outputs
().
front
()
->
name
()
==
"transpose"
;
}
bool
is_reshaper
(
instruction_ref
ins
)
{
return
contains
(
reshaper_names
(),
ins
->
name
());
}
instruction_ref
find_transpose_input
(
instruction_ref
ins
)
{
...
...
@@ -42,62 +38,189 @@ instruction_ref find_transpose_input(instruction_ref ins)
return
ins
;
}
void
simplify_reshapes
::
apply
(
program
&
p
)
co
ns
t
auto
get_transpose_dims
(
instruction_ref
i
ns
)
{
auto
end
=
std
::
prev
(
p
.
end
());
for
(
auto
ins
:
iterator_for
(
p
))
return
any_cast
<
const
op
::
transpose
&>
(
ins
->
get_operator
()).
dims
;
}
std
::
vector
<
int64_t
>
reorder_dims
(
std
::
vector
<
int64_t
>
dims
,
std
::
vector
<
int64_t
>
permutation
)
{
std
::
vector
<
int64_t
>
result
(
dims
.
size
());
assert
(
dims
.
size
()
==
permutation
.
size
());
for
(
std
::
size_t
i
=
0
;
i
<
dims
.
size
();
i
++
)
{
if
(
ins
==
end
and
ins
->
name
()
==
"contiguous"
)
continue
;
// Skip possible dead instructions
if
(
ins
->
outputs
().
empty
()
and
ins
!=
end
)
continue
;
if
(
is_reshaper
(
ins
))
result
[
i
]
=
dims
[
permutation
[
i
]];
}
return
result
;
}
bool
is_no_transpose
(
const
std
::
vector
<
int64_t
>&
dims
)
{
if
(
dims
.
empty
())
return
true
;
if
(
dims
.
front
()
!=
0
)
return
false
;
return
std
::
adjacent_find
(
dims
.
begin
(),
dims
.
end
(),
[](
auto
x
,
auto
y
)
{
return
(
y
-
x
)
!=
1
;
})
==
dims
.
end
();
}
template
<
class
Vector
,
class
Op
>
std
::
vector
<
int64_t
>
sort_permutation
(
const
Vector
&
data
,
Op
op
)
{
std
::
vector
<
std
::
int64_t
>
result
(
data
.
size
());
std
::
iota
(
result
.
begin
(),
result
.
end
(),
0
);
std
::
sort
(
result
.
begin
(),
result
.
end
(),
[
&
](
auto
x
,
auto
y
)
{
return
op
(
data
[
x
],
data
[
y
]);
});
return
result
;
}
std
::
vector
<
int64_t
>
invert_permutation
(
const
std
::
vector
<
int64_t
>&
permutation
)
{
return
sort_permutation
(
permutation
,
std
::
less
<>
{});
}
std
::
vector
<
int64_t
>
find_permutation
(
const
shape
&
s
)
{
return
sort_permutation
(
s
.
strides
(),
std
::
greater
<>
{});
}
struct
find_reshaper
{
auto
matcher
()
const
{
return
match
::
name
(
reshaper_names
())(
match
::
any_of
[
match
::
outputs
()](
match
::
name
(
reshaper_names
())));
}
void
apply
(
program
&
p
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
std
::
vector
<
instruction_ref
>
reshapes
{
ins
};
while
(
is_reshaper
(
reshapes
.
back
()))
{
if
(
std
::
any_of
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
(),
&
is_reshaper
))
continue
;
// Gather reshapes
std
::
vector
<
instruction_ref
>
reshapes
{
ins
};
while
(
is_reshaper
(
reshapes
.
back
()))
{
assert
(
!
reshapes
.
back
()
->
inputs
().
empty
());
assert
(
p
.
has_instruction
(
reshapes
.
back
()
->
inputs
().
front
()));
auto
input
=
reshapes
.
back
()
->
inputs
().
front
();
reshapes
.
push_back
(
input
);
}
assert
(
!
reshapes
.
back
()
->
inputs
().
empty
());
assert
(
p
.
has_instruction
(
reshapes
.
back
()
->
inputs
().
front
()));
auto
input
=
reshapes
.
back
()
->
inputs
().
front
();
reshapes
.
push_back
(
input
);
}
std
::
pair
<
instruction_ref
,
instruction_ref
>
r
{
p
.
end
(),
p
.
end
()};
for
(
auto
start
:
iterator_for
(
reshapes
))
{
auto
last
=
std
::
find_if
(
reshapes
.
rbegin
(),
reshapes
.
rend
(),
[
&
](
auto
&&
i
)
{
return
i
->
get_shape
()
==
(
*
start
)
->
get_shape
()
and
i
!=
(
*
start
);
});
if
(
last
!=
reshapes
.
rend
())
{
r
=
std
::
make_pair
(
*
start
,
*
last
);
break
;
}
}
if
(
r
.
first
!=
r
.
second
)
std
::
pair
<
instruction_ref
,
instruction_ref
>
r
{
p
.
end
(),
p
.
end
()};
for
(
auto
start
:
iterator_for
(
reshapes
))
{
auto
last
=
std
::
find_if
(
reshapes
.
rbegin
(),
reshapes
.
rend
(),
[
&
](
auto
&&
i
)
{
return
i
->
get_shape
()
==
(
*
start
)
->
get_shape
()
and
i
!=
(
*
start
);
});
if
(
last
!=
reshapes
.
rend
())
{
p
.
replace_instruction
(
r
.
first
,
r
.
second
);
r
=
std
::
make_pair
(
*
start
,
*
last
);
break
;
}
}
else
if
(
ins
->
name
()
==
"transpose"
)
if
(
r
.
first
!=
r
.
second
)
{
p
.
replace_instruction
(
r
.
first
,
r
.
second
);
}
}
};
struct
find_nop_reshapes
{
auto
matcher
()
const
{
auto
reshapes
=
reshaper_names
();
reshapes
.
insert
(
"transpose"
);
reshapes
.
insert
(
"slice"
);
return
match
::
name
(
reshapes
)(
match
::
same_shape
(
match
::
arg
(
0
)));
}
void
apply
(
program
&
p
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
p
.
replace_instruction
(
ins
,
ins
->
inputs
().
front
());
}
};
struct
find_transpose
{
auto
matcher
()
const
{
return
match
::
name
(
"transpose"
)(
match
::
none_of
(
match
::
skip_output
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"transpose"
))));
}
void
apply
(
program
&
p
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
x
=
ins
;
auto
t
=
ins
;
std
::
vector
<
std
::
int64_t
>
dims
(
ins
->
get_shape
().
lens
().
size
());
std
::
iota
(
dims
.
begin
(),
dims
.
end
(),
0
);
do
{
dims
=
reorder_dims
(
get_transpose_dims
(
t
),
dims
);
x
=
t
;
t
=
find_transpose_input
(
x
);
}
while
(
x
!=
t
and
t
->
name
()
==
"transpose"
);
if
(
t
==
ins
or
t
->
name
()
!=
"transpose"
)
return
;
if
(
is_no_transpose
(
dims
))
{
if
(
is_transpose_output
(
ins
))
continue
;
auto
x
=
ins
;
auto
t
=
ins
;
do
{
x
=
t
;
t
=
find_transpose_input
(
x
);
}
while
(
x
!=
t
and
t
->
name
()
==
"transpose"
);
if
(
t
==
ins
or
t
->
name
()
!=
"transpose"
)
continue
;
p
.
replace_instruction
(
ins
,
t
->
inputs
().
front
());
}
else
{
p
.
replace_instruction
(
ins
,
op
::
transpose
{{
dims
}},
t
->
inputs
().
front
());
}
}
};
struct
find_concat_transpose
{
auto
matcher
()
const
{
return
match
::
name
(
"concat"
)(
match
::
same_input_shapes
(),
match
::
all_of
[
match
::
inputs
()](
match
::
transpose_shape
()));
}
void
apply
(
program
&
p
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
s
=
ins
->
inputs
().
front
()
->
get_shape
();
assert
(
s
.
transposed
());
auto
op
=
any_cast
<
op
::
concat
>
(
ins
->
get_operator
());
auto
permutation
=
find_permutation
(
s
);
auto
ipermutation
=
invert_permutation
(
permutation
);
op
.
axis
=
ipermutation
[
op
.
axis
];
std
::
vector
<
instruction_ref
>
inputs
;
std
::
transform
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
inputs
),
[
&
](
auto
i
)
{
if
(
i
->
name
()
==
"transpose"
and
i
->
inputs
().
front
()
->
get_shape
().
standard
())
return
i
->
inputs
().
front
();
return
p
.
insert_instruction
(
ins
,
op
::
transpose
{
permutation
},
i
);
});
auto
concat
=
p
.
insert_instruction
(
ins
,
op
,
inputs
);
auto
t
=
p
.
insert_instruction
(
ins
,
op
::
transpose
{
ipermutation
},
concat
);
assert
(
ins
->
get_shape
().
lens
()
==
t
->
get_shape
().
lens
());
p
.
replace_instruction
(
ins
,
t
);
}
};
void
simplify_reshapes
::
apply
(
program
&
p
)
const
{
auto
end
=
std
::
prev
(
p
.
end
());
for
(
auto
ins
:
iterator_for
(
p
))
{
if
(
ins
==
end
and
ins
->
name
()
==
"contiguous"
)
continue
;
// Skip possible dead instructions
if
(
ins
->
outputs
().
empty
()
and
ins
!=
end
)
continue
;
match
::
find_matches
(
p
,
ins
,
find_nop_reshapes
{},
find_reshaper
{},
find_transpose
{},
find_concat_transpose
{});
}
}
...
...
src/targets/cpu/CMakeLists.txt
View file @
20b1d690
...
...
@@ -5,6 +5,7 @@ add_library(migraphx_cpu
gemm.cpp
)
set_target_properties
(
migraphx_cpu PROPERTIES EXPORT_NAME cpu
)
rocm_set_soversion
(
migraphx_cpu
${
PROJECT_VERSION
}
)
find_path
(
BLAZE_INCLUDE blaze/Blaze.h
)
find_package
(
Threads
)
...
...
Prev
1
2
3
4
5
6
7
8
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment