Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3a848f0d
Commit
3a848f0d
authored
Mar 19, 2020
by
Paul
Browse files
Merge branch 'develop' into doc2
parents
64e8e30a
d1e945da
Changes
208
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1259 additions
and
232 deletions
+1259
-232
src/include/migraphx/program.hpp
src/include/migraphx/program.hpp
+4
-2
src/include/migraphx/schedule_model.hpp
src/include/migraphx/schedule_model.hpp
+13
-7
src/include/migraphx/target.hpp
src/include/migraphx/target.hpp
+13
-7
src/include/migraphx/tf.hpp
src/include/migraphx/tf.hpp
+8
-1
src/instruction.cpp
src/instruction.cpp
+8
-0
src/onnx/cifar10.cpp
src/onnx/cifar10.cpp
+4
-3
src/onnx/mnist.cpp
src/onnx/mnist.cpp
+3
-2
src/onnx/onnx.cpp
src/onnx/onnx.cpp
+471
-106
src/onnx/onnx.proto
src/onnx/onnx.proto
+356
-74
src/program.cpp
src/program.cpp
+68
-9
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+13
-4
src/quantization.cpp
src/quantization.cpp
+6
-0
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+79
-1
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+79
-13
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+6
-0
src/targets/gpu/argmax.cpp
src/targets/gpu/argmax.cpp
+3
-1
src/targets/gpu/argmin.cpp
src/targets/gpu/argmin.cpp
+3
-1
src/targets/gpu/deconvolution.cpp
src/targets/gpu/deconvolution.cpp
+103
-0
src/targets/gpu/device/acos.cpp
src/targets/gpu/device/acos.cpp
+1
-1
src/targets/gpu/device/acosh.cpp
src/targets/gpu/device/acosh.cpp
+18
-0
No files found.
src/include/migraphx/program.hpp
View file @
3a848f0d
...
...
@@ -87,13 +87,15 @@ struct program
instruction_ref
add_parameter
(
std
::
string
name
,
shape
s
);
instruction_ref
add_return
(
std
::
vector
<
instruction_ref
>
args
);
shape
get_parameter_shape
(
std
::
string
name
)
const
;
instruction_ref
get_parameter
(
std
::
string
name
)
const
;
std
::
unordered_map
<
std
::
string
,
shape
>
get_parameter_shapes
()
const
;
argument
eval
(
parameter_map
params
)
const
;
std
::
vector
<
argument
>
eval
(
parameter_map
params
)
const
;
bool
has_instruction
(
instruction_ref
ins
)
const
;
...
...
@@ -101,7 +103,7 @@ struct program
instruction_ref
begin
()
const
;
instruction_ref
end
()
const
;
shape
get_shape
()
const
;
std
::
vector
<
shape
>
get_
output_
shape
s
()
const
;
context
&
get_context
()
const
;
...
...
src/include/migraphx/schedule_model.hpp
View file @
3a848f0d
...
...
@@ -69,11 +69,17 @@ struct schedule_model
template
<
typename
PrivateDetailTypeErasedT
>
schedule_model
&
operator
=
(
PrivateDetailTypeErasedT
value
)
{
if
(
private_detail_te_handle_mem_var
.
unique
())
*
private_detail_te_handle_mem_var
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
else
if
(
!
private_detail_te_handle_mem_var
)
private_detail_te_handle_mem_var
=
std
::
make_shared
<
PrivateDetailTypeErasedT
>
(
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
));
using
std
::
swap
;
auto
*
derived
=
this
->
any_cast
<
PrivateDetailTypeErasedT
>
();
if
(
derived
and
private_detail_te_handle_mem_var
.
unique
())
{
*
derived
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
}
else
{
schedule_model
rhs
(
value
);
swap
(
private_detail_te_handle_mem_var
,
rhs
.
private_detail_te_handle_mem_var
);
}
return
*
this
;
}
...
...
@@ -81,7 +87,7 @@ struct schedule_model
template
<
typename
PrivateDetailTypeErasedT
>
PrivateDetailTypeErasedT
*
any_cast
()
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
@@ -92,7 +98,7 @@ struct schedule_model
template
<
typename
PrivateDetailTypeErasedT
>
const
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
*
any_cast
()
const
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
const
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
src/include/migraphx/target.hpp
View file @
3a848f0d
...
...
@@ -115,11 +115,17 @@ struct target
template
<
typename
PrivateDetailTypeErasedT
>
target
&
operator
=
(
PrivateDetailTypeErasedT
value
)
{
if
(
private_detail_te_handle_mem_var
.
unique
())
*
private_detail_te_handle_mem_var
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
else
if
(
!
private_detail_te_handle_mem_var
)
private_detail_te_handle_mem_var
=
std
::
make_shared
<
PrivateDetailTypeErasedT
>
(
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
));
using
std
::
swap
;
auto
*
derived
=
this
->
any_cast
<
PrivateDetailTypeErasedT
>
();
if
(
derived
and
private_detail_te_handle_mem_var
.
unique
())
{
*
derived
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
}
else
{
target
rhs
(
value
);
swap
(
private_detail_te_handle_mem_var
,
rhs
.
private_detail_te_handle_mem_var
);
}
return
*
this
;
}
...
...
@@ -127,7 +133,7 @@ struct target
template
<
typename
PrivateDetailTypeErasedT
>
PrivateDetailTypeErasedT
*
any_cast
()
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
@@ -138,7 +144,7 @@ struct target
template
<
typename
PrivateDetailTypeErasedT
>
const
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
*
any_cast
()
const
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
const
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
src/include/migraphx/tf.hpp
View file @
3a848f0d
...
...
@@ -7,8 +7,15 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
/// struct to pass in tf options to parser
struct
tf_options
{
bool
is_nhwc
=
false
;
unsigned
int
batch_size
=
1
;
};
/// Create a program from a tf pb file (default is nhwc format)
program
parse_tf
(
const
std
::
string
&
name
,
bool
is_nhwc
);
program
parse_tf
(
const
std
::
string
&
name
,
tf_options
=
tf_options
{}
);
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/instruction.cpp
View file @
3a848f0d
...
...
@@ -22,6 +22,9 @@ void instruction::replace(const shape& r)
result
=
r
;
for
(
auto
&&
ins
:
output
)
{
if
(
ins
->
name
()
==
"@return"
)
continue
;
assert
(
ins
->
name
().
front
()
!=
'@'
);
ins
->
recompute_shape
();
}
...
...
@@ -70,6 +73,10 @@ bool instruction::valid() const
{
computed
=
result
;
}
else
if
(
op
.
name
()
==
"@return"
)
{
computed
=
{};
}
else
{
try
...
...
@@ -81,6 +88,7 @@ bool instruction::valid() const
return
false
;
}
}
return
result
==
computed
&&
std
::
all_of
(
output
.
begin
(),
output
.
end
(),
[
&
](
instruction_ref
i
)
{
return
std
::
find
(
i
->
inputs
().
begin
(),
i
->
inputs
().
end
(),
*
this
)
!=
i
->
inputs
().
end
();
});
...
...
src/onnx/cifar10.cpp
View file @
3a848f0d
...
...
@@ -73,8 +73,9 @@ int main(int argc, char const* argv[])
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
std
::
cout
<<
"label: "
<<
static_cast
<
uint32_t
>
(
labels
[
i
])
<<
" ----> "
;
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
3072
*
i
]});
auto
result
=
migraphx
::
gpu
::
from_gpu
(
prog
.
eval
(
m
));
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
3072
*
i
]});
auto
gpu_result
=
prog
.
eval
(
m
).
back
();
auto
result
=
migraphx
::
gpu
::
from_gpu
(
gpu_result
);
std
::
vector
<
float
>
logits
;
result
.
visit
([
&
](
auto
output
)
{
logits
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
float
>
probs
=
softmax
<
float
>
(
logits
);
...
...
@@ -95,7 +96,7 @@ int main(int argc, char const* argv[])
{
std
::
cout
<<
"label: "
<<
static_cast
<
uint32_t
>
(
labels
[
i
])
<<
" ----> "
;
auto
input3
=
migraphx
::
argument
{
s
,
&
ptr
[
3072
*
i
]};
auto
result
=
prog
.
eval
({{
"0"
,
input3
}});
auto
result
=
prog
.
eval
({{
"0"
,
input3
}})
.
back
()
;
std
::
vector
<
float
>
logits
;
result
.
visit
([
&
](
auto
output
)
{
logits
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
float
>
probs
=
softmax
<
float
>
(
logits
);
...
...
src/onnx/mnist.cpp
View file @
3a848f0d
...
...
@@ -130,8 +130,9 @@ int main(int argc, char const* argv[])
for
(
int
i
=
0
;
i
<
20
;
i
++
)
{
std
::
cout
<<
"label: "
<<
labels
[
i
]
<<
" ----> "
;
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
784
*
i
]});
auto
result
=
migraphx
::
gpu
::
from_gpu
(
prog
.
eval
(
m
));
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
784
*
i
]});
auto
results
=
prog
.
eval
(
m
).
back
();
auto
result
=
migraphx
::
gpu
::
from_gpu
(
results
);
std
::
vector
<
float
>
logits
;
result
.
visit
([
&
](
auto
output
)
{
logits
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
float
>
probs
=
softmax
(
logits
);
...
...
src/onnx/onnx.cpp
View file @
3a848f0d
...
...
@@ -16,6 +16,7 @@
#include <migraphx/instruction.hpp>
#include <migraphx/config.hpp>
#include <migraphx/onnx.hpp>
#include <migraphx/pad_calc.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -28,87 +29,102 @@ struct onnx_parser
std
::
function
<
std
::
vector
<
instruction_ref
>
(
attribute_map
,
std
::
vector
<
instruction_ref
>
)
>
;
node_map
nodes
;
std
::
unordered_map
<
std
::
string
,
instruction_ref
>
instructions
;
program
prog
=
program
();
bool
is_pytorch
=
false
;
program
prog
=
program
();
bool
is_pytorch
=
false
;
unsigned
int
batch_size
=
1
;
std
::
unordered_map
<
std
::
string
,
op_func
>
ops
;
std
::
unordered_map
<
std
::
string
,
operation
>
map_actv_funcs
;
onnx_parser
()
{
add_generic_op
(
"Relu"
,
op
::
relu
{});
add_generic_op
(
"Sigmoid"
,
op
::
sigmoid
{});
// sort onnx operator alphabetically through name
add_generic_op
(
"Abs"
,
op
::
abs
{});
add_generic_op
(
"Exp"
,
op
::
exp
{});
add_generic_op
(
"Acos"
,
op
::
acos
{});
add_generic_op
(
"Acosh"
,
op
::
acosh
{});
add_generic_op
(
"Asin"
,
op
::
asin
{});
add_generic_op
(
"Asinh"
,
op
::
asinh
{});
add_generic_op
(
"Atan"
,
op
::
atan
{});
add_generic_op
(
"Atanh"
,
op
::
atanh
{});
add_generic_op
(
"Ceil"
,
op
::
ceil
{});
add_generic_op
(
"Cos"
,
op
::
cos
{});
add_generic_op
(
"Cosh"
,
op
::
cosh
{});
add_generic_op
(
"Erf"
,
op
::
erf
{});
add_generic_op
(
"Log"
,
op
::
log
{});
// disable dropout for inference
add_generic_op
(
"Exp"
,
op
::
exp
{});
add_generic_op
(
"Dropout"
,
op
::
identity
{});
add_generic_op
(
"Log"
,
op
::
log
{});
add_generic_op
(
"Floor"
,
op
::
floor
{});
add_generic_op
(
"Identity"
,
op
::
identity
{});
add_generic_op
(
"Relu"
,
op
::
relu
{});
add_generic_op
(
"Round"
,
op
::
round
{});
add_generic_op
(
"Sigmoid"
,
op
::
sigmoid
{});
add_generic_op
(
"Sign"
,
op
::
sign
{});
add_generic_op
(
"Sin"
,
op
::
sin
{});
add_generic_op
(
"Cos"
,
op
::
cos
{});
add_generic_op
(
"Tan"
,
op
::
tan
{});
add_generic_op
(
"Sinh"
,
op
::
sinh
{});
add_generic_op
(
"Cosh"
,
op
::
cosh
{});
add_generic_op
(
"Tanh"
,
op
::
tanh
{});
add_generic_op
(
"Asin"
,
op
::
asin
{});
add_generic_op
(
"Acos"
,
op
::
acos
{});
add_generic_op
(
"Atan"
,
op
::
atan
{});
add_generic_op
(
"Sqrt"
,
op
::
sqrt
{});
add_generic_op
(
"Round"
,
op
::
round
{});
add_generic_op
(
"Sign"
,
op
::
sign
{});
add_generic_op
(
"Ceil"
,
op
::
ceil
{});
add_generic_op
(
"Floor"
,
op
::
floor
{});
add_generic_op
(
"Tan"
,
op
::
tan
{});
add_generic_op
(
"Tanh"
,
op
::
tanh
{});
add_binary_op
(
"Add"
,
op
::
add
{});
add_binary_op
(
"Div"
,
op
::
div
{});
add_binary_op
(
"Mul"
,
op
::
mul
{});
add_binary_op
(
"Sub"
,
op
::
sub
{});
add_binary_op
(
"Pow"
,
op
::
pow
{});
add_binary_op
(
"PRelu"
,
op
::
prelu
{});
add_binary_op
(
"Sub"
,
op
::
sub
{});
add_variadic_op
(
"Sum"
,
op
::
add
{});
add_variadic_op
(
"Max"
,
op
::
max
{});
add_variadic_op
(
"Min"
,
op
::
min
{});
add_mem_op
(
"AveragePool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"ArgMax"
,
&
onnx_parser
::
parse_arg_op
<
op
::
argmax
>
);
add_mem_op
(
"ArgMin"
,
&
onnx_parser
::
parse_arg_op
<
op
::
argmin
>
);
add_mem_op
(
"BatchNormalization"
,
&
onnx_parser
::
parse_batchnorm
);
add_mem_op
(
"Cast"
,
&
onnx_parser
::
parse_cast
);
add_mem_op
(
"Clip"
,
&
onnx_parser
::
parse_clip
);
add_mem_op
(
"LRN"
,
&
onnx_parser
::
parse_lrn
);
add_mem_op
(
"ImageScaler"
,
&
onnx_parser
::
parse_imagescaler
);
add_mem_op
(
"LeakyRelu"
,
&
onnx_parser
::
parse_leaky_relu
);
add_mem_op
(
"Concat"
,
&
onnx_parser
::
parse_concat
);
add_mem_op
(
"Constant"
,
&
onnx_parser
::
parse_constant
);
add_mem_op
(
"ConstantFill"
,
&
onnx_parser
::
parse_constant_fill
);
add_mem_op
(
"ConstantOfShape"
,
&
onnx_parser
::
parse_constant_of_shape
);
add_mem_op
(
"Conv"
,
&
onnx_parser
::
parse_conv
<
op
::
convolution
>
);
add_mem_op
(
"ConvInteger"
,
&
onnx_parser
::
parse_conv
<
op
::
quant_convolution
>
);
add_mem_op
(
"ConvTranspose"
,
&
onnx_parser
::
parse_conv_transpose
);
add_mem_op
(
"Elu"
,
&
onnx_parser
::
parse_elu
);
add_mem_op
(
"Expand"
,
&
onnx_parser
::
parse_expand
);
add_mem_op
(
"Constant"
,
&
onnx_parser
::
parse_constant
);
add_mem_op
(
"Conv"
,
&
onnx_parser
::
parse_conv
);
add_mem_op
(
"MaxPool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"AveragePool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"GlobalMaxPool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"GlobalAveragePool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"Reshape"
,
&
onnx_parser
::
parse_reshape
);
add_mem_op
(
"Flatten"
,
&
onnx_parser
::
parse_flatten
);
add_mem_op
(
"Gather"
,
&
onnx_parser
::
parse_gather
);
add_mem_op
(
"Gemm"
,
&
onnx_parser
::
parse_gemm
);
add_mem_op
(
"MatMul"
,
&
onnx_parser
::
parse_matmul
);
add_mem_op
(
"BatchNormalization"
,
&
onnx_parser
::
parse_batchnorm
);
add_mem_op
(
"Softmax"
,
&
onnx_parser
::
parse_softmax
<
op
::
softmax
>
);
add_mem_op
(
"GlobalAveragePool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"GlobalMaxPool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"GRU"
,
&
onnx_parser
::
parse_gru
);
add_mem_op
(
"ImageScaler"
,
&
onnx_parser
::
parse_imagescaler
);
add_mem_op
(
"InstanceNormalization"
,
&
onnx_parser
::
parse_instancenorm
);
add_mem_op
(
"LeakyRelu"
,
&
onnx_parser
::
parse_leaky_relu
);
add_mem_op
(
"LogSoftmax"
,
&
onnx_parser
::
parse_softmax
<
op
::
logsoftmax
>
);
add_mem_op
(
"Squeeze"
,
&
onnx_parser
::
parse_squeeze
);
add_mem_op
(
"Unsqueeze"
,
&
onnx_parser
::
parse_unsqueeze
);
add_mem_op
(
"Slice"
,
&
onnx_parser
::
parse_slice
);
add_mem_op
(
"Concat"
,
&
onnx_parser
::
parse_concat
);
add_mem_op
(
"Gather"
,
&
onnx_parser
::
parse_gather
);
add_mem_op
(
"LRN"
,
&
onnx_parser
::
parse_lrn
);
add_mem_op
(
"MatMul"
,
&
onnx_parser
::
parse_matmul
<
op
::
dot
>
);
add_mem_op
(
"MatMulInteger"
,
&
onnx_parser
::
parse_matmul
<
op
::
quant_dot
>
);
add_mem_op
(
"MaxPool"
,
&
onnx_parser
::
parse_pooling
);
add_mem_op
(
"ReduceL1"
,
&
onnx_parser
::
parse_reduce_l1
);
add_mem_op
(
"ReduceL2"
,
&
onnx_parser
::
parse_reduce_l2
);
add_mem_op
(
"ReduceLogSum"
,
&
onnx_parser
::
parse_reduce_log_sum
);
add_mem_op
(
"ReduceLogSumExp"
,
&
onnx_parser
::
parse_reduce_log_sum_exp
);
add_mem_op
(
"ReduceMax"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_max
>
);
add_mem_op
(
"ReduceMean"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_mean
>
);
add_mem_op
(
"ReduceMin"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_min
>
);
add_mem_op
(
"ReduceProd"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_prod
>
);
add_mem_op
(
"ReduceSum"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_sum
>
);
add_mem_op
(
"ReduceSumSquare"
,
&
onnx_parser
::
parse_reduce_sum_square
);
add_mem_op
(
"Reshape"
,
&
onnx_parser
::
parse_reshape
);
add_mem_op
(
"RNN"
,
&
onnx_parser
::
parse_rnn
);
add_mem_op
(
"Pad"
,
&
onnx_parser
::
parse_pad
);
add_mem_op
(
"Shape"
,
&
onnx_parser
::
parse_shape
);
add_mem_op
(
"ConstantFill"
,
&
onnx_parser
::
parse_constant_fill
);
add_mem_op
(
"ConstantOfShape"
,
&
onnx_parser
::
parse_constant_of_shape
);
add_mem_op
(
"Slice"
,
&
onnx_parser
::
parse_slice
);
add_mem_op
(
"Softmax"
,
&
onnx_parser
::
parse_softmax
<
op
::
softmax
>
);
add_mem_op
(
"Squeeze"
,
&
onnx_parser
::
parse_squeeze
);
add_mem_op
(
"Transpose"
,
&
onnx_parser
::
parse_transpose
);
add_mem_op
(
"RNN"
,
&
onnx_parser
::
parse_rnn
);
add_mem_op
(
"GRU"
,
&
onnx_parser
::
parse_gru
);
add_mem_op
(
"Unsqueeze"
,
&
onnx_parser
::
parse_unsqueeze
);
add_mem_op
(
"LSTM"
,
&
onnx_parser
::
parse_lstm
);
add_mem_op
(
"Pad"
,
&
onnx_parser
::
parse_pad
);
add_mem_op
(
"ReduceSum"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_sum
>
);
add_mem_op
(
"ReduceMean"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_mean
>
);
add_mem_op
(
"ReduceMin"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_min
>
);
add_mem_op
(
"ReduceMax"
,
&
onnx_parser
::
parse_reduce_oper
<
op
::
reduce_max
>
);
// init the activation function map
init_actv_func
();
...
...
@@ -230,8 +246,15 @@ struct onnx_parser
auto
s0
=
arg0
->
get_shape
().
lens
();
auto
s1
=
arg1
->
get_shape
().
lens
();
auto
out_lens
=
compute_broadcasted_lens
(
s0
,
s1
);
auto
l0
=
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
arg0
);
auto
l1
=
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
arg1
);
auto
l0
=
arg0
;
if
(
arg0
->
get_shape
().
lens
()
!=
out_lens
)
l0
=
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
arg0
);
auto
l1
=
arg1
;
if
(
arg1
->
get_shape
().
lens
()
!=
out_lens
)
l1
=
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
arg1
);
return
prog
.
add_instruction
(
x
,
l0
,
l1
);
}
else
...
...
@@ -261,6 +284,43 @@ struct onnx_parser
});
}
template
<
class
T
>
std
::
vector
<
int64_t
>
to_int64_vector
(
const
std
::
vector
<
T
>&
input_vector
)
{
std
::
vector
<
int64_t
>
output_vector
(
input_vector
.
begin
(),
input_vector
.
end
());
return
output_vector
;
}
instruction_ref
add_bias
(
const
std
::
vector
<
instruction_ref
>&
args
,
instruction_ref
curr_ins
,
uint64_t
axis
)
{
if
(
args
.
size
()
==
3
)
{
auto
bias_bcast
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
curr_ins
->
get_shape
().
lens
()},
args
[
2
]);
return
prog
.
add_instruction
(
op
::
add
{},
curr_ins
,
bias_bcast
);
}
return
curr_ins
;
}
template
<
class
Op
>
void
check_asym_padding
(
instruction_ref
&
ins
,
std
::
vector
<
int64_t
>&
padding
,
Op
&
op
,
float
pad_val
=
0
)
{
if
(
padding
[
0
]
!=
padding
[
2
]
||
padding
[
1
]
!=
padding
[
3
])
{
padding
=
{
0
,
0
,
padding
[
0
],
padding
[
1
],
0
,
0
,
padding
[
2
],
padding
[
3
]};
ins
=
prog
.
add_instruction
(
op
::
pad
{
padding
,
pad_val
},
ins
);
}
else
{
op
.
padding
[
0
]
=
padding
[
0
];
op
.
padding
[
1
]
=
padding
[
1
];
}
}
instruction_ref
parse_clip
(
const
std
::
string
&
,
const
attribute_map
&
attributes
,
std
::
vector
<
instruction_ref
>
args
)
...
...
@@ -282,7 +342,7 @@ struct onnx_parser
const
attribute_map
&
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
int
axis
=
1
;
int
64_t
axis
=
1
;
if
(
contains
(
attributes
,
"axis"
))
{
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
...
...
@@ -319,11 +379,72 @@ struct onnx_parser
}
}
template
<
class
Op
>
instruction_ref
process_auto_pad_attribute
(
instruction_ref
ins
,
attribute_map
&
attributes
,
Op
&
op
,
const
std
::
vector
<
std
::
size_t
>&
in_lens
)
{
if
(
!
contains
(
attributes
,
"auto_pad"
))
{
return
ins
;
}
auto
auto_pad
=
attributes
[
"auto_pad"
].
s
();
if
(
auto_pad
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
{
// calculate the padding
std
::
array
<
std
::
size_t
,
2
>
out_lens
;
out_lens
[
0
]
=
(
in_lens
[
2
]
+
op
.
stride
[
0
]
-
1
)
/
op
.
stride
[
0
];
out_lens
[
1
]
=
(
in_lens
[
3
]
+
op
.
stride
[
1
]
-
1
)
/
op
.
stride
[
1
];
std
::
array
<
std
::
size_t
,
2
>
explicit_pads
;
explicit_pads
[
0
]
=
(
out_lens
[
0
]
-
1
)
*
op
.
stride
[
0
]
+
op
.
lengths
[
0
]
-
in_lens
[
2
];
explicit_pads
[
1
]
=
(
out_lens
[
1
]
-
1
)
*
op
.
stride
[
1
]
+
op
.
lengths
[
1
]
-
in_lens
[
3
];
op
.
padding
[
0
]
=
explicit_pads
[
0
]
/
2
;
op
.
padding
[
1
]
=
explicit_pads
[
1
]
/
2
;
explicit_pads
[
0
]
-=
2
*
op
.
padding
[
0
];
explicit_pads
[
1
]
-=
2
*
op
.
padding
[
1
];
std
::
vector
<
std
::
int64_t
>
pads
(
8
,
0
);
if
(
explicit_pads
[
0
]
!=
0
or
explicit_pads
[
1
]
!=
0
)
{
if
(
auto_pad
==
"SAME_UPPER"
)
{
pads
[
6
]
=
explicit_pads
[
0
];
pads
[
7
]
=
explicit_pads
[
1
];
}
else
if
(
auto_pad
==
"SAME_LOWER"
)
{
pads
[
2
]
=
explicit_pads
[
0
];
pads
[
3
]
=
explicit_pads
[
1
];
}
// MaxPool
if
(
op
.
mode
==
"max"
)
{
ins
=
prog
.
add_instruction
(
op
::
pad
{
pads
,
std
::
numeric_limits
<
float
>::
lowest
()},
ins
);
}
// AveragePool
else
{
ins
=
prog
.
add_instruction
(
op
::
pad
{
pads
},
ins
);
}
}
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
}
return
ins
;
}
template
<
class
Op
>
instruction_ref
parse_conv
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
op
::
convolution
op
;
auto
l0
=
args
[
0
];
Op
op
;
auto
l0
=
args
[
0
];
auto
weights
=
args
[
1
];
if
(
contains
(
attributes
,
"pads"
))
{
if
(
contains
(
attributes
,
"auto_pad"
))
...
...
@@ -340,11 +461,76 @@ struct onnx_parser
{
MIGRAPHX_THROW
(
"padding should have 4 values"
);
}
check_asym_padding
(
l0
,
padding
,
op
);
}
if
(
contains
(
attributes
,
"strides"
))
{
copy
(
attributes
[
"strides"
].
ints
(),
op
.
stride
.
begin
());
}
if
(
contains
(
attributes
,
"dilations"
))
{
copy
(
attributes
[
"dilations"
].
ints
(),
op
.
dilation
.
begin
());
}
if
(
contains
(
attributes
,
"auto_pad"
))
{
auto
s
=
attributes
[
"auto_pad"
].
s
();
if
(
contains
(
attributes
,
"pads"
)
and
to_upper
(
s
)
!=
"NOTSET"
)
{
MIGRAPHX_THROW
(
"auto_pad and padding cannot be specified simultaneously"
);
}
if
(
s
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
{
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
std
::
vector
<
size_t
>
weight_dims
=
weights
->
get_shape
().
lens
();
size_t
weight_h
=
weight_dims
[
2
];
size_t
weight_w
=
weight_dims
[
3
];
auto
input_dims
=
l0
->
get_shape
().
lens
();
std
::
vector
<
int64_t
>
padding
(
input_dims
.
size
());
calculate_padding
(
0
,
padding
,
input_dims
[
2
],
op
.
stride
[
0
],
op
.
dilation
[
0
],
weight_h
);
calculate_padding
(
1
,
padding
,
input_dims
[
3
],
op
.
stride
[
1
],
op
.
dilation
[
1
],
weight_w
);
check_asym_padding
(
l0
,
padding
,
op
);
}
}
if
(
contains
(
attributes
,
"group"
))
{
op
.
group
=
parse_value
(
attributes
.
at
(
"group"
)).
at
<
int
>
();
}
auto
l1
=
prog
.
add_instruction
(
op
,
l0
,
args
[
1
]);
return
add_bias
(
args
,
l1
,
1
);
}
instruction_ref
parse_conv_transpose
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
op
::
deconvolution
op
;
auto
l0
=
args
[
0
];
std
::
vector
<
std
::
int64_t
>
padding
;
bool
asymm_padding
=
false
;
if
(
contains
(
attributes
,
"pads"
))
{
if
(
contains
(
attributes
,
"auto_pad"
))
{
auto
s
=
attributes
[
"auto_pad"
].
s
();
if
(
contains
(
attributes
,
"pads"
)
and
to_upper
(
s
)
!=
"NOTSET"
)
{
MIGRAPHX_THROW
(
"auto_pad and padding cannot be specified simultaneously"
);
}
}
copy
(
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
padding
));
if
(
padding
.
size
()
!=
4
)
{
MIGRAPHX_THROW
(
"padding should have 4 values"
);
}
if
(
padding
[
0
]
!=
padding
[
2
]
||
padding
[
1
]
!=
padding
[
3
])
{
// insert zeros for pad op (args[0] has 4 dims)
padding
=
{
0
,
0
,
padding
[
0
],
padding
[
1
],
0
,
0
,
padding
[
2
],
padding
[
3
]};
l0
=
prog
.
add_instruction
(
op
::
pad
{
padding
},
l0
);
asymm_padding
=
true
;
}
else
{
...
...
@@ -373,18 +559,55 @@ struct onnx_parser
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
}
}
if
(
contains
(
attributes
,
"group"
))
{
op
.
group
=
parse_value
(
attributes
.
at
(
"group"
)).
at
<
int
>
();
}
if
(
args
.
size
()
==
3
)
auto
l1
=
prog
.
add_instruction
(
op
,
l0
,
args
[
1
]);
std
::
vector
<
int64_t
>
dims
=
to_int64_vector
(
l1
->
get_shape
().
lens
());
std
::
vector
<
int64_t
>
curr_shape
{
dims
[
2
],
dims
[
3
]};
if
(
asymm_padding
)
{
uint64_t
axis
=
1
;
auto
l1
=
prog
.
add_instruction
(
op
,
l0
,
args
[
1
]);
auto
l2
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
l1
->
get_shape
().
lens
()},
args
[
2
]);
return
prog
.
add_instruction
(
op
::
add
{},
l1
,
l2
);
op
::
slice
slice_op
;
slice_op
.
axes
=
{
0
,
1
,
2
,
3
};
slice_op
.
starts
=
{
0
,
0
,
0
+
padding
[
0
],
0
+
padding
[
1
]};
slice_op
.
ends
=
{
dims
[
0
],
dims
[
1
],
curr_shape
[
0
]
-
padding
[
2
],
curr_shape
[
1
]
-
padding
[
3
]};
l1
=
prog
.
add_instruction
(
slice_op
,
l1
);
}
if
(
contains
(
attributes
,
"output_padding"
))
{
std
::
vector
<
int64_t
>
output_padding
;
copy
(
attributes
[
"output_padding"
].
ints
(),
std
::
back_inserter
(
output_padding
));
output_padding
=
{
0
,
0
,
0
,
0
,
0
,
0
,
output_padding
[
0
],
output_padding
[
1
]};
l1
=
prog
.
add_instruction
(
op
::
pad
{
output_padding
},
l1
);
}
if
(
contains
(
attributes
,
"output_shape"
))
{
std
::
vector
<
int64_t
>
output_shape
;
copy
(
attributes
[
"output_shape"
].
ints
(),
std
::
back_inserter
(
output_shape
));
dims
=
to_int64_vector
(
l1
->
get_shape
().
lens
());
curr_shape
=
{
dims
[
2
],
dims
[
3
]};
if
(
curr_shape
!=
output_shape
)
{
std
::
vector
<
int64_t
>
target_padding
=
{
0
,
0
,
0
,
0
,
0
,
0
,
output_shape
[
0
]
-
curr_shape
[
0
],
output_shape
[
1
]
-
curr_shape
[
1
]};
l1
=
prog
.
add_instruction
(
op
::
pad
{
target_padding
},
l1
);
}
}
return
prog
.
add_instruction
(
op
,
l0
,
args
[
1
]);
return
add_bias
(
args
,
l1
,
1
);
}
instruction_ref
parse_pooling
(
const
std
::
string
&
name
,
...
...
@@ -398,27 +621,31 @@ struct onnx_parser
auto
lens
=
args
.
front
()
->
get_shape
().
lens
();
op
.
lengths
=
{
lens
[
2
],
lens
[
3
]};
}
if
(
contains
(
attributes
,
"pads"
))
{
if
(
contains
(
attributes
,
"auto_pad"
))
{
auto
s
=
attributes
[
"auto_pad"
].
s
();
if
(
to_upper
(
s
)
!=
"NOTSET"
)
{
MIGRAPHX_THROW
(
"PARSE_POOLING: auto_pad and padding cannot be specified simultaneously"
);
}
}
std
::
vector
<
std
::
int64_t
>
padding
;
copy
(
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
padding
));
if
(
padding
.
size
()
!=
4
)
{
MIGRAPHX_THROW
(
"padding should have 4 values"
);
}
if
(
padding
[
0
]
!=
padding
[
2
]
||
padding
[
1
]
!=
padding
[
3
])
{
// insert zeros for pad op (args[0] has 4 dims)
padding
=
{
0
,
0
,
padding
[
0
],
padding
[
1
],
0
,
0
,
padding
[
2
],
padding
[
3
]};
l0
=
prog
.
add_instruction
(
op
::
pad
{
padding
,
std
::
numeric_limits
<
float
>::
lowest
()},
l0
);
}
else
{
op
.
padding
[
0
]
=
padding
[
0
];
op
.
padding
[
1
]
=
padding
[
1
];
MIGRAPHX_THROW
(
"PARSE_POOLING: padding should have 4 values"
);
}
float
pad_val
=
0
;
if
(
op
.
mode
==
"max"
)
pad_val
=
std
::
numeric_limits
<
float
>::
lowest
();
check_asym_padding
(
l0
,
padding
,
op
,
pad_val
);
}
if
(
contains
(
attributes
,
"strides"
))
{
copy
(
attributes
[
"strides"
].
ints
(),
op
.
stride
.
begin
());
...
...
@@ -427,14 +654,11 @@ struct onnx_parser
{
copy
(
attributes
[
"kernel_shape"
].
ints
(),
op
.
lengths
.
begin
());
}
if
(
contains
(
attributes
,
"auto_pad"
))
{
auto
s
=
attributes
[
"auto_pad"
].
s
();
if
(
s
.
find
(
"SAME_UPPER"
)
==
std
::
string
::
npos
)
{
MIGRAPHX_THROW
(
"auto_pad only supports SAME_UPPER for pooling"
);
}
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
auto
in_lens
=
args
[
0
]
->
get_shape
().
lens
();
l0
=
process_auto_pad_attribute
(
l0
,
attributes
,
op
,
in_lens
);
}
return
prog
.
add_instruction
(
op
,
l0
);
...
...
@@ -462,7 +686,7 @@ struct onnx_parser
instruction_ref
parse_flatten
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
u
int64_t
axis
=
1
;
int64_t
axis
=
1
;
if
(
contains
(
attributes
,
"axis"
))
{
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
...
...
@@ -616,6 +840,7 @@ struct onnx_parser
return
prog
.
add_instruction
(
op
::
dot
{
alpha
,
beta
},
l1
,
l2
);
}
template
<
class
Op
>
instruction_ref
parse_matmul
(
const
std
::
string
&
,
const
attribute_map
&
,
std
::
vector
<
instruction_ref
>
args
)
{
...
...
@@ -664,7 +889,7 @@ struct onnx_parser
}
}
auto
dot_res
=
prog
.
add_instruction
(
op
::
dot
{
1.0
f
,
0.0
f
},
bl0
,
bl1
);
auto
dot_res
=
prog
.
add_instruction
(
Op
{
1
,
0
},
bl0
,
bl1
);
int64_t
num_axis
=
static_cast
<
int64_t
>
(
dot_res
->
get_shape
().
lens
().
size
());
if
(
is_a_prepended
)
{
...
...
@@ -703,6 +928,42 @@ struct onnx_parser
return
prog
.
add_instruction
(
op
,
std
::
move
(
args
));
}
instruction_ref
parse_instancenorm
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
// y = scale * ( x - mean ) / sqrt ( variance + epsilon ) + bias
// mean = reduce_mean({H, W}, x)
// variance = reduce_mean({H, W}, (x - mean)^2)
float
epsilon
=
1e-5
f
;
if
(
contains
(
attributes
,
"epsilon"
))
{
epsilon
=
parse_value
(
attributes
.
at
(
"epsilon"
)).
at
<
float
>
();
}
auto
x
=
args
[
0
];
auto
scale
=
args
[
1
];
auto
bias
=
args
[
2
];
auto
dims
=
x
->
get_shape
().
lens
();
auto
mean
=
prog
.
add_instruction
(
op
::
reduce_mean
{{
2
,
3
}},
x
);
auto
mean_bcast
=
prog
.
add_instruction
(
op
::
multibroadcast
{
dims
},
mean
);
auto
l0
=
prog
.
add_instruction
(
op
::
sqdiff
{},
x
,
mean_bcast
);
auto
variance
=
prog
.
add_instruction
(
op
::
reduce_mean
{{
2
,
3
}},
l0
);
auto
l1
=
prog
.
add_instruction
(
op
::
sub
{},
x
,
mean_bcast
);
auto
epsilon_literal
=
prog
.
add_literal
(
epsilon
);
auto
epsilon_bcast
=
prog
.
add_instruction
(
op
::
multibroadcast
{
dims
},
epsilon_literal
);
auto
variance_bcast
=
prog
.
add_instruction
(
op
::
multibroadcast
{
dims
},
variance
);
auto
l2
=
prog
.
add_instruction
(
op
::
add
{},
variance_bcast
,
epsilon_bcast
);
auto
l3
=
prog
.
add_instruction
(
op
::
rsqrt
{},
l2
);
auto
l4
=
prog
.
add_instruction
(
op
::
mul
{},
l1
,
l3
);
auto
scale_bcast
=
prog
.
add_instruction
(
op
::
broadcast
{
1
,
dims
},
scale
);
;
auto
bias_bcast
=
prog
.
add_instruction
(
op
::
broadcast
{
1
,
dims
},
bias
);
auto
l5
=
prog
.
add_instruction
(
op
::
mul
{},
l4
,
scale_bcast
);
return
prog
.
add_instruction
(
op
::
add
{},
l5
,
bias_bcast
);
}
instruction_ref
parse_leaky_relu
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
...
...
@@ -763,11 +1024,12 @@ struct onnx_parser
auto
&&
bias_floats
=
attributes
[
"bias"
].
floats
();
bias
=
std
::
vector
<
float
>
(
bias_floats
.
begin
(),
bias_floats
.
end
());
}
auto
input_lens
=
args
.
front
()
->
get_shape
().
lens
();
auto
input_shape
=
args
.
front
()
->
get_shape
();
auto
const
&
input_lens
=
input_shape
.
lens
();
auto
input_type
=
input_shape
.
type
();
auto
scale_val
=
prog
.
add_literal
(
scale
);
auto
bias_vals
=
prog
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
bias
.
size
()}},
bias
});
auto
scale_val
=
prog
.
add_literal
(
literal
{
shape
{
input_type
},
{
scale
}});
auto
bias_vals
=
prog
.
add_literal
(
literal
{
shape
{
input_type
,
{
bias
.
size
()}},
bias
});
auto
scale_tensor
=
prog
.
add_instruction
(
migraphx
::
op
::
scalar
{
input_lens
},
scale_val
);
auto
img_scaled
=
prog
.
add_instruction
(
migraphx
::
op
::
mul
{},
args
.
front
(),
scale_tensor
);
...
...
@@ -1396,6 +1658,47 @@ struct onnx_parser
}
}
instruction_ref
parse_reduce_l1
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
abs_ins
=
prog
.
add_instruction
(
op
::
abs
{},
args
[
0
]);
return
parse_reduce_oper
<
op
::
reduce_sum
>
({},
std
::
move
(
attributes
),
{
abs_ins
});
}
instruction_ref
parse_reduce_l2
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
square_ins
=
prog
.
add_instruction
(
op
::
mul
{},
args
[
0
],
args
[
0
]);
auto
sum_ins
=
parse_reduce_oper
<
op
::
reduce_sum
>
({},
std
::
move
(
attributes
),
{
square_ins
});
return
prog
.
add_instruction
(
op
::
sqrt
{},
sum_ins
);
}
instruction_ref
parse_reduce_log_sum
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
sum_ins
=
parse_reduce_oper
<
op
::
reduce_sum
>
({},
std
::
move
(
attributes
),
std
::
move
(
args
));
return
prog
.
add_instruction
(
op
::
log
{},
sum_ins
);
}
instruction_ref
parse_reduce_log_sum_exp
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
exp_ins
=
prog
.
add_instruction
(
op
::
exp
{},
args
[
0
]);
auto
sum_ins
=
parse_reduce_oper
<
op
::
reduce_sum
>
({},
std
::
move
(
attributes
),
{
exp_ins
});
return
prog
.
add_instruction
(
op
::
log
{},
sum_ins
);
}
instruction_ref
parse_reduce_sum_square
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
square_ins
=
prog
.
add_instruction
(
op
::
mul
{},
args
[
0
],
args
[
0
]);
return
parse_reduce_oper
<
op
::
reduce_sum
>
({},
std
::
move
(
attributes
),
{
square_ins
});
}
instruction_ref
parse_cast
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
...
...
@@ -1425,6 +1728,22 @@ struct onnx_parser
}
}
void
parse_from
(
const
void
*
data
,
std
::
size_t
size
)
{
onnx
::
ModelProto
model
;
if
(
model
.
ParseFromArray
(
data
,
size
))
{
if
(
model
.
has_graph
())
{
this
->
parse_graph
(
model
.
graph
());
}
}
else
{
MIGRAPHX_THROW
(
"Failed reading onnx file."
);
}
}
void
parse_graph
(
const
onnx
::
GraphProto
&
graph
)
{
nodes
=
get_nodes
(
graph
);
...
...
@@ -1438,7 +1757,7 @@ struct onnx_parser
if
(
!
contains
(
instructions
,
name
))
{
// TODO: Get shape of input parameter
shape
s
=
parse_type
(
input
.
type
());
shape
s
=
parse_type
(
input
.
type
()
,
batch_size
);
instructions
[
name
]
=
prog
.
add_parameter
(
name
,
s
);
}
}
...
...
@@ -1446,6 +1765,29 @@ struct onnx_parser
{
this
->
parse_node
(
output
.
name
());
}
// Find instructions corresponding to the output
auto
prog_output
=
graph
.
output
();
std
::
vector
<
std
::
string
>
all_output_names
;
std
::
vector
<
std
::
string
>
prog_output_names
;
std
::
transform
(
prog_output
.
begin
(),
prog_output
.
end
(),
std
::
back_inserter
(
all_output_names
),
[](
auto
&
node
)
{
return
node
.
name
();
});
std
::
copy_if
(
all_output_names
.
begin
(),
all_output_names
.
end
(),
std
::
back_inserter
(
prog_output_names
),
[
&
](
const
auto
&
name
)
{
return
!
(
name
.
empty
()
or
instructions
.
count
(
name
)
==
0
);
});
std
::
vector
<
instruction_ref
>
output_ins
;
std
::
transform
(
prog_output_names
.
begin
(),
prog_output_names
.
end
(),
std
::
back_inserter
(
output_ins
),
[
&
](
const
auto
&
name
)
{
return
instructions
[
name
];
});
// add the return instuction
prog
.
add_return
(
output_ins
);
}
void
parse_undefined
(
const
std
::
string
&
name
)
...
...
@@ -1464,14 +1806,14 @@ struct onnx_parser
std
::
vector
<
instruction_ref
>
args
;
for
(
auto
&&
input
:
node
.
input
())
{
if
(
nodes
.
count
(
input
)
>
0
)
if
(
input
.
empty
()
)
{
assert
(
name
!=
input
);
this
->
parse_node
(
input
);
this
->
parse_undefined
(
input
);
}
else
if
(
input
.
empty
()
)
else
if
(
nodes
.
count
(
input
)
>
0
)
{
this
->
parse_undefined
(
input
);
assert
(
name
!=
input
);
this
->
parse_node
(
input
);
}
args
.
push_back
(
instructions
.
at
(
input
));
}
...
...
@@ -1491,12 +1833,12 @@ struct onnx_parser
}
else
{
a
ssert
(
node
.
output
().
size
()
>=
result
.
size
());
std
::
transform
(
result
.
begin
(),
result
.
end
()
,
node
.
output
()
.
begin
(),
a
uto
output_num
=
std
::
min
<
std
::
size_t
>
(
node
.
output
().
size
()
,
result
.
size
());
std
::
transform
(
node
.
output
()
.
begin
(),
node
.
output
().
begin
()
+
output_num
,
result
.
begin
(),
std
::
inserter
(
instructions
,
instructions
.
end
()),
[](
auto
&&
x
,
auto
&&
y
)
{
return
std
::
make_pair
(
y
,
x
);
});
[](
auto
&&
x
,
auto
&&
y
)
{
return
std
::
make_pair
(
x
,
y
);
});
}
}
}
...
...
@@ -1572,6 +1914,8 @@ struct onnx_parser
case
onnx
::
AttributeProto
::
STRING
:
case
onnx
::
AttributeProto
::
STRINGS
:
case
onnx
::
AttributeProto
::
TENSORS
:
case
onnx
::
AttributeProto
::
SPARSE_TENSOR
:
case
onnx
::
AttributeProto
::
SPARSE_TENSORS
:
case
onnx
::
AttributeProto
::
GRAPHS
:
return
{};
}
MIGRAPHX_THROW
(
"Invalid attribute type"
);
...
...
@@ -1658,7 +2002,7 @@ struct onnx_parser
return
literal
{{
shape_type
,
dims
},
data
.
begin
(),
data
.
end
()};
}
static
shape
parse_type
(
const
onnx
::
TypeProto
&
t
)
static
shape
parse_type
(
const
onnx
::
TypeProto
&
t
,
const
unsigned
int
batch_size
)
{
shape
::
type_t
shape_type
{};
switch
(
t
.
tensor_type
().
elem_type
())
...
...
@@ -1686,14 +2030,18 @@ struct onnx_parser
std
::
transform
(
tensor_dims
.
begin
(),
tensor_dims
.
end
(),
std
::
back_inserter
(
dims
),
[](
auto
&&
d
)
->
std
::
size_t
{
if
(
not
d
.
has_dim_value
())
[
&
](
auto
&&
d
)
->
std
::
size_t
{
if
(
d
.
has_dim_value
())
{
long
default_batch_size
=
1
;
// FIXME
return
default_batch_size
;
if
(
static_cast
<
int
>
(
d
.
dim_value
())
<=
0
)
return
batch_size
;
return
d
.
dim_value
();
}
return
d
.
dim_value
()
;
return
batch_size
;
});
if
(
dims
.
empty
())
return
{
shape_type
};
return
{
shape_type
,
dims
};
}
...
...
@@ -1728,15 +2076,16 @@ struct onnx_parser
}
};
program
parse_onnx
(
const
std
::
string
&
name
)
template
<
class
...
Ts
>
program
parse_onnx_from
(
onnx_options
options
,
Ts
&&
...
xs
)
{
std
::
fstream
input
(
name
.
c_str
(),
std
::
ios
::
in
|
std
::
ios
::
binary
);
onnx_parser
parser
;
parser
.
batch_size
=
options
.
batch_size
;
#ifndef NDEBUG
// Log the program when it can't be parsed
try
{
parser
.
parse_from
(
input
);
parser
.
parse_from
(
std
::
forward
<
Ts
>
(
xs
)...
);
}
catch
(...)
{
...
...
@@ -1744,10 +2093,26 @@ program parse_onnx(const std::string& name)
throw
;
}
#else
parser
.
parse_from
(
input
);
parser
.
parse_from
(
std
::
forward
<
Ts
>
(
xs
)...
);
#endif
return
std
::
move
(
parser
.
prog
);
}
program
parse_onnx
(
const
std
::
string
&
name
,
onnx_options
options
)
{
std
::
fstream
input
(
name
.
c_str
(),
std
::
ios
::
in
|
std
::
ios
::
binary
);
return
parse_onnx_from
(
options
,
input
);
}
program
parse_onnx_buffer
(
const
std
::
string
&
buffer
,
onnx_options
options
)
{
return
parse_onnx_from
(
options
,
buffer
.
data
(),
buffer
.
size
());
}
program
parse_onnx_buffer
(
const
void
*
data
,
std
::
size_t
size
,
onnx_options
options
)
{
return
parse_onnx_from
(
options
,
data
,
size
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/onnx/onnx.proto
View file @
3a848f0d
...
...
@@ -3,24 +3,42 @@
//
// Copyright (c)
Facebook Inc. and Microsoft Corporation
.
// Copyright (c)
ONNX Project Contributors
.
// Licensed under the MIT license.
syntax
=
"proto2"
;
package
onnx
;
// Note [Release]
// Overview
//
// ONNX is an open specification that is comprised of the following components:
//
// 1) A definition of an extensible computation graph model.
// 2) Definitions of standard data types.
// 3) Definitions of built-in operators.
//
// This document describes the syntax of models and their computation graphs,
// as well as the standard data types. Together, they are referred to as the ONNX
// Intermediate Representation, or 'IR' for short.
//
// The normative semantic specification of the ONNX IR is found in docs/IR.md.
// Definitions of the built-in neural network operators may be found in docs/Operators.md.
// Notes
//
// Release
//
// We are still in the very early stage of defining ONNX. The current
// version of ONNX is a starting point. While we are actively working
// towards a complete spec, we would like to get the community involved
// by sharing our working version of ONNX.
//
Note [
Protobuf compatibility
]
//
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
Based on experience working with downstream vendors, we generally can't
//
assume recent versions of protobufs
. This means that we do not use any
// protobuf features that are only available in
proto3
.
//
// Protobuf compatibility
//
//
To simplify framework compatibility, ONNX is defined using the subset of protobuf
//
that is compatible with both protobuf v2 and v3
. This means that we do not use any
// protobuf features that are only available in
one of the two versions
.
//
// Here are the most notable contortions we have to carry out to work around
// these limitations:
...
...
@@ -29,30 +47,11 @@ package onnx;
// of key-value pairs, where order does not matter and duplicates
// are not allowed.
// Note [Namespaces]
// ~~~~~~~~~~~~~~~~~
// ONNX gives explicit names to graphs, intermediate values and
// serialized tensors. To make it easier to generate names, we organize
// these into separate namespaces (so, e.g., a graph can have the same
// name as a serialized tensor.) The namespaces are as follows:
//
// - Node: These names identify specific nodes in the graph (but not, necessarily
// any particular input or output of the node.
// - Graph: These names identify graphs in the protobuf.
// - Attribute: These names identify attribute names for extra attributes that
// are passed to operators.
// - Operator: These names identify particular operators.
// - Value: These names identify intermediate values (typically tensors) flowing through
// the computation of a graph.
// - Shape: These names represent parameters for unknown shape dimensions.
// Versioning
//
// We specify the namespace of a name in ONNX as comments in the form
// of "namespace {Node,Graph,Operator,Attribute,Value,Shape}". Framework is responsible
// for supporting the namespaces.
// ONNX versioning is specified in docs/IR.md and elaborated on in docs/Versioning.md
//
// Naming things is hard. Every element with a name has an optional doc_string associated
// with it, providing a human-readable description in text markdown.
// To be compatible with both proto2 and proto3, we will use a version number
// that is not defined by the default value but an explicit enum number.
enum
Version
{
...
...
@@ -61,26 +60,53 @@ enum Version {
_START_VERSION
=
0
;
// The version field is always serialized and we will use it to store the
// version that the graph is generated from. This helps us set up version
// control. We should use version as
// xx(major) - xx(minor) - xxxx(bugfix)
// and we are starting with 0x00000001 (0.0.1), which was the
// version we published on Oct 10, 2017.
IR_VERSION_2017_10_10
=
0x00000001
;
// control.
// For the IR, we are using simple numbers starting with 0x00000001,
// which was the version we published on Oct 10, 2017.
IR_VERSION_2017_10_10
=
0x0000000000000001
;
// IR_VERSION
0.0.
2 published on Oct 30, 2017
// IR_VERSION 2 published on Oct 30, 2017
// - Added type discriminator to AttributeProto to support proto3 users
IR_VERSION_2017_10_30
=
0x00000002
;
IR_VERSION_2017_10_30
=
0x0000000
00000000
2
;
// IR VERSION
0.0.
3 published on Nov 3, 2017
// IR VERSION 3 published on Nov 3, 2017
// - For operator versioning:
// - Added new message OperatorSetIdProto
// - Added opset_import in ModelProto
// - For vendor extensions, added domain in NodeProto
IR_VERSION
=
0x00000003
;
IR_VERSION_2017_11_3
=
0x0000000000000003
;
// IR VERSION 4 published on Jan 22, 2019
// - Relax constraint that initializers should be a subset of graph inputs
// - Add type BFLOAT16
IR_VERSION_2019_1_22
=
0x0000000000000004
;
// IR VERSION 5 published on March 18, 2019
// - Add message TensorAnnotation.
// - Add quantization annotation in GraphProto to map tensor with its scale and zero point quantization parameters.
IR_VERSION_2019_3_18
=
0x0000000000000005
;
// IR VERSION 6 published on Sep 19, 2019
// - Add support for sparse tensor constants stored in model.
// - Add message SparseTensorProto
// - Add sparse initializers
IR_VERSION_2019_9_19
=
0x0000000000000006
;
// IR VERSION 7 published on <TBD>
// - Add a list to promote inference graph's initializers to global and
// mutable variables. Global variables are visible in all graphs of the
// stored models.
// - Add message TrainingInfoProto to store initialization
// method and training algorithm. The execution of TrainingInfoProto
// can modify the values of mutable variables.
// - Make inference graph callable from TrainingInfoProto via GraphCall operator.
IR_VERSION
=
0x0000000000000007
;
}
// A named attribute containing either singular float, integer, string
// and tensor values, or repeated float, integer, string and tensor values.
// Attributes
//
// A named attribute containing either singular float, integer, string, graph,
// and tensor values, or repeated float, integer, string, graph, and tensor values.
// An AttributeProto MUST contain the name field, and *only one* of the
// following content fields, effectively enforcing a C/C++ union equivalent.
message
AttributeProto
{
...
...
@@ -94,26 +120,34 @@ message AttributeProto {
STRING
=
3
;
TENSOR
=
4
;
GRAPH
=
5
;
SPARSE_TENSOR
=
11
;
FLOATS
=
6
;
INTS
=
7
;
STRINGS
=
8
;
TENSORS
=
9
;
GRAPHS
=
10
;
SPARSE_TENSORS
=
12
;
}
// The name field MUST be present for this version of the IR.
optional
string
name
=
1
;
// namespace Attribute
// if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function.
// In this case, this AttributeProto does not contain data, and it's a reference of attribute
// in parent scope.
// NOTE: This should ONLY be used in function (sub-graph). It's invalid to be used in main graph.
optional
string
ref_attr_name
=
21
;
// A human-readable documentation for this attribute. Markdown is allowed.
optional
string
doc_string
=
13
;
// The type field MUST be present for this version of the IR.
// For 0.0.1 versions of the IR, this field was not defined, and
// implementations needed to use has_field h
u
eristics to determine
// implementations needed to use has_field he
u
ristics to determine
// which value field was in use. For IR_VERSION 0.0.2 or later, this
// field MUST be set and match the f|i|s|t|... field in use. This
// change was made to accomodate proto3 implementations.
// change was made to accom
m
odate proto3 implementations.
optional
AttributeType
type
=
20
;
// discriminator that indicates which field below is in use
// Exactly ONE of the following fields must be present for this version of the IR
...
...
@@ -122,6 +156,7 @@ message AttributeProto {
optional
bytes
s
=
4
;
// UTF-8 string
optional
TensorProto
t
=
5
;
// tensor value
optional
GraphProto
g
=
6
;
// graph
optional
SparseTensorProto
sparse_tensor
=
22
;
// sparse tensor value
// Do not use field below, it's deprecated.
// optional ValueProto v = 12; // value - subsumes everything but graph
...
...
@@ -130,6 +165,7 @@ message AttributeProto {
repeated
bytes
strings
=
9
;
// list of UTF-8 strings
repeated
TensorProto
tensors
=
10
;
// list of tensors
repeated
GraphProto
graphs
=
11
;
// list of graph
repeated
SparseTensorProto
sparse_tensors
=
23
;
// list of sparse tensors
}
// Defines information on value, including the name, the type, and
...
...
@@ -137,16 +173,20 @@ message AttributeProto {
message
ValueInfoProto
{
// This field MUST be present in this version of the IR.
optional
string
name
=
1
;
// namespace Value
// This field MUST be present in this version of the IR.
// This field MUST be present in this version of the IR for
// inputs and outputs of the top-level graph.
optional
TypeProto
type
=
2
;
// A human-readable documentation for this value. Markdown is allowed.
optional
string
doc_string
=
3
;
}
// NodeProto stores a node that is similar to the notion of "layer"
// or "operator" in many deep learning frameworks. For example, it can be a
// node of type "Conv" that takes in an image, a filter tensor and a bias
// tensor, and produces the convolved output.
// Nodes
//
// Computation graphs are made up of a DAG of nodes, which represent what is
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
//
// For example, it can be a node of type "Conv" that takes in an image, a filter
// tensor and a bias tensor, and produces the convolved output.
message
NodeProto
{
repeated
string
input
=
1
;
// namespace Value
repeated
string
output
=
2
;
// namespace Value
...
...
@@ -161,18 +201,125 @@ message NodeProto {
optional
string
domain
=
7
;
// namespace Domain
// Additional named attributes.
// NOTE: Simply using ValueProto.NameValuePairProto is the most general
// solution. I kept AttributeProto to minimize churn on CI results.
repeated
AttributeProto
attribute
=
5
;
// A human-readable documentation for this node. Markdown is allowed.
optional
string
doc_string
=
6
;
}
// ModelProto is a top-level file/container format for bundling a ML model.
// The semantics of the model are described by the GraphProto that represents
// a parameterized computation graph against a set of named operators that are
// defined independently from the graph.
// Training information
// TrainingInfoProto stores information for training a model.
// In particular, this defines two functionalities: an initialization-step
// and a training-algorithm-step. Initialization resets the model
// back to its original state as if no training has been consumed.
// Training algorithm improves the model based on input data.
//
// The semantics of the initialization-step is that the initializers
// in ModelProto.graph and in TrainingInfoProto.algorithm are first
// initialized as specified by the initializers in the graph, and then
// updated by the "initialization_binding" in every instance in
// ModelProto.training_info.
//
// The field "algorithm" defines a computation graph which represents a
// training algorithm's step. After the execution of a
// TrainingInfoProto.algorithm, the initializers specified by "update_binding"
// may be immediately updated. If the targeted training algorithm contains
// consecutive update stages (such as block coordinate descent methods),
// the user needs to create a TrainingInfoProto for each stage.
message
TrainingInfoProto
{
// This field describes a graph to compute the initial tensors
// upon starting the training process. Initialization graph has no input
// and can have multiple outputs. Usually, trainable tensors in neural
// networks are randomly initialized. To achieve that, for each tensor,
// the user can put a random number operator such as RandomNormal or
// RandomUniform in TrainingInfoProto.initialization.node and assign its
// random output to the specific tensor using "initialization_binding".
// This graph can also set the initializers in "algorithm" in the same
// TrainingInfoProto; a use case is resetting the number of training
// iteration to zero.
//
// By default, this field is an empty graph and its evaluation does not
// produce any output.
optional
GraphProto
initialization
=
1
;
// This field represents a training algorithm step. Given required inputs,
// it computes outputs to update initializers in its own or inference graph's
// initializer lists. In general, this graph contains loss node, gradient node,
// optimizer node, increment of iteration count, and some calls to the inference
// graph.
//
// The field algorithm.node is the only place the user can use GraphCall
// operator. The only callable graph is the one stored in ModelProto.graph.
//
// By default, this field is an empty graph and its evaluation does not
// produce any output.
optional
GraphProto
algorithm
=
2
;
// This field specifies the bindings from the outputs of "initialization" to
// some initializers in "ModelProto.graph.initializer" and
// the "algorithm.initializer" in the same TrainingInfoProto.
// See "update_binding" below for details.
//
// By default, this field is empty and no initializer would be changed
// by the execution of "initialization".
repeated
StringStringEntryProto
initialization_binding
=
3
;
// Gradient-based training is usually an iterative procedure. In one gradient
// descent iteration, we apply
//
// x = x - r * g
//
// where "x" is the optimized tensor, "r" stands for learning rate, and "g" is
// gradient of "x" with respect to a chosen loss. To avoid adding assignments
// into the training graph, we split the update equation into
//
// y = x - r * g
// x = y
//
// The user needs to save "y = x - r * g" into TrainingInfoProto.algorithm. To
// tell that "y" should be assigned to "x", the field "update_binding" may
// contain a key-value pair of strings, "x" (key of StringStringEntryProto)
// and "y" (value of StringStringEntryProto).
// For a neural network with multiple trainable (mutable) tensors, there can
// be multiple key-value pairs in "update_binding".
//
// The initializers appears as keys in "update_binding" are considered
// mutable and globally-visible variables. This implies some behaviors
// as described below.
//
// 1. We have only unique keys in all "update_binding"s so that two global
// variables may not have the same name. This ensures that one
// global variable is assigned up to once.
// 2. The keys must appear in names of "ModelProto.graph.initializer" or
// "TrainingInfoProto.algorithm.initializer".
// 3. The values must be output names of "algorithm".
// 4. If an optional input of a graph is omitted when using GraphCall, the
// global variable with the same name may be used.
// 5. When using GraphCall, the users always can pass values to optional
// inputs of the called graph even if the associated initializers appears
// as keys in "update_binding"s.
// 6. The graphs in TrainingInfoProto's can use global variables as
// their operator inputs.
// 7. Mutable variables are initialized to the value specified by the
// corresponding initializer, and then potentially updated by
// "initializer_binding"s and "update_binding"s in "TrainingInfoProto"s.
//
// This field usually contains names of trainable tensors
// (in ModelProto.graph), optimizer states such as momentums in advanced
// stochastic gradient methods (in TrainingInfoProto.graph),
// and number of training iterations (in TrainingInfoProto.graph).
//
// By default, this field is empty and no initializer would be changed
// by the execution of "algorithm".
repeated
StringStringEntryProto
update_binding
=
4
;
}
// Models
//
// ModelProto is a top-level file/container format for bundling a ML model and
// associating its computation graph with metadata.
//
// The semantics of the model are described by the associated GraphProto's.
message
ModelProto
{
// The version of the IR this model targets. See Version enum above.
// This field MUST be present.
...
...
@@ -217,6 +364,17 @@ message ModelProto {
// Named metadata values; keys should be distinct.
repeated
StringStringEntryProto
metadata_props
=
14
;
// Training-specific information. Sequentially executing all stored
// `TrainingInfoProto.algorithm`s and assigning their outputs following
// the corresponding `TrainingInfoProto.update_binding`s is one training
// iteration. Similarly, to initialize the model
// (as if training hasn't happened), the user should sequentially execute
// all stored `TrainingInfoProto.initialization`s and assigns their outputs
// using `TrainingInfoProto.initialization_binding`s.
//
// If this field is empty, the training behavior of the model is undefined.
repeated
TrainingInfoProto
training_info
=
20
;
};
// StringStringEntryProto follows the pattern for cross-proto-version maps.
...
...
@@ -226,25 +384,38 @@ message StringStringEntryProto {
optional
string
value
=
2
;
};
// GraphProto defines a parameterized series of nodes to form a directed acyclic graph.
// This is the equivalent of the "network" and "graph" in many deep learning
message
TensorAnnotation
{
optional
string
tensor_name
=
1
;
// <key, value> pairs to annotate tensor specified by <tensor_name> above.
// The keys used in the mapping below must be pre-defined in ONNX spec.
// For example, for 8-bit linear quantization case, 'SCALE_TENSOR', 'ZERO_POINT_TENSOR' will be pre-defined as
// quantization parameter keys.
repeated
StringStringEntryProto
quant_parameter_tensor_names
=
2
;
}
// Graphs
//
// A graph defines the computational logic of a model and is comprised of a parameterized
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
// This is the equivalent of the "network" or "graph" in many deep learning
// frameworks.
message
GraphProto
{
// The nodes in the graph.
// The nodes in the graph
, sorted topologically
.
repeated
NodeProto
node
=
1
;
// The name of the graph.
optional
string
name
=
2
;
// namespace Graph
// A list of named tensor values (constants), used to specify default
// values for some of the inputs of the graph.
// A list of named tensor values, used to specify constant inputs of the graph.
// Each TensorProto entry must have a distinct name (within the list) that
// also appears in the input list.
// In an evaluation, the default value specified here is used if and only if
// user specifies no value for the corresponding input parameter.
// May be used to pass serialized parameters for networks.
// MAY also appear in the input list.
repeated
TensorProto
initializer
=
5
;
// Initializers (see above) stored in sparse format.
repeated
SparseTensorProto
sparse_initializer
=
15
;
// A human-readable documentation for this graph. Markdown is allowed.
optional
string
doc_string
=
10
;
...
...
@@ -256,7 +427,13 @@ message GraphProto {
// must be distinct. It is optional for a value to appear in value_info list.
repeated
ValueInfoProto
value_info
=
13
;
// DO NOT USE the following fields, they were deprecated before
// This field carries information to indicate the mapping among a tensor and its
// quantization parameter tensors. For example:
// For tensor 'a', it may have {'SCALE_TENSOR', 'a_scale'} and {'ZERO_POINT_TENSOR', 'a_zero_point'} annotated,
// which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model.
repeated
TensorAnnotation
quantization_annotation
=
14
;
// DO NOT USE the following fields, they were deprecated from earlier versions.
// repeated string input = 3;
// repeated string output = 4;
// optional int64 ir_version = 6;
...
...
@@ -265,7 +442,9 @@ message GraphProto {
// optional string domain = 9;
}
// A message defined to store a tensor in its serialized format.
// Tensors
//
// A serialized tensor value.
message
TensorProto
{
enum
DataType
{
UNDEFINED
=
0
;
...
...
@@ -280,13 +459,21 @@ message TensorProto {
STRING
=
8
;
// string
BOOL
=
9
;
// bool
// Advanced types
// IEEE754 half-precision floating-point format (16 bits wide).
// This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
FLOAT16
=
10
;
DOUBLE
=
11
;
UINT32
=
12
;
UINT64
=
13
;
COMPLEX64
=
14
;
// complex with float32 real and imaginary components
COMPLEX128
=
15
;
// complex with float64 real and imaginary components
// Non-IEEE floating-point format based on IEEE754 single-precision
// floating-point number truncated to 16 bits.
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
BFLOAT16
=
16
;
// Future extensions go here.
}
...
...
@@ -294,7 +481,8 @@ message TensorProto {
repeated
int64
dims
=
1
;
// The data type of the tensor.
optional
DataType
data_type
=
2
;
// This field MUST have a valid TensorProto.DataType value
optional
int32
data_type
=
2
;
// For very large tensors, we may want to store them in chunks, in which
// case the following fields will specify the segment that is stored in
...
...
@@ -305,7 +493,7 @@ message TensorProto {
}
optional
Segment
segment
=
3
;
// Tensor content must be
in the
row
major order.
// Tensor content must be
organized in
row
-
major order.
//
// Depending on the data_type field, exactly one of the fields below with
// name ending in _data is used to store the elements of the tensor.
...
...
@@ -313,7 +501,7 @@ message TensorProto {
// For float and complex64 values
// Complex64 tensors are encoded as a single array of floats,
// with the real components appearing in odd numbered positions,
// and the corresponding imaginary component apparing in the
// and the corresponding imaginary component app
e
aring in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
...
...
@@ -323,7 +511,7 @@ message TensorProto {
// float16 values must be bit-wise converted to an uint16_t prior
// to writing to the buffer.
// When this field is present, the data_type field MUST be
// INT32, INT16, INT8, UINT16, INT8, BOOL, or FLOAT
32
// INT32, INT16, INT8, UINT16,
U
INT8, BOOL, or FLOAT
16
repeated
int32
int32_data
=
5
[
packed
=
true
];
// For strings.
...
...
@@ -360,10 +548,32 @@ message TensorProto {
// When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
optional
bytes
raw_data
=
9
;
// Data can be stored inside the protobuf file using type-specific fields or raw_data.
// Alternatively, raw bytes data can be stored in an external file, using the external_data field.
// external_data stores key-value pairs describing data location. Recognized keys are:
// - "location" (required) - POSIX filesystem path relative to the directory where the ONNX
// protobuf model was stored
// - "offset" (optional) - position of byte at which stored data begins. Integer stored as string.
// Offset values SHOULD be multiples 4096 (page size) to enable mmap support.
// - "length" (optional) - number of bytes containing data. Integer stored as string.
// - "checksum" (optional) - SHA1 digest of file specified in under 'location' key.
repeated
StringStringEntryProto
external_data
=
13
;
// Location of the data for this tensor. MUST be one of:
// - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field.
// - EXTERNAL - data stored in an external location as described by external_data field.
enum
DataLocation
{
DEFAULT
=
0
;
EXTERNAL
=
1
;
}
// If value not set, data is stored in raw_data (if set) otherwise in type-specified field.
optional
DataLocation
data_location
=
14
;
// For double
// Complex
64
tensors are encoded as a single array of doubles,
// Complex
128
tensors are encoded as a single array of doubles,
// with the real components appearing in odd numbered positions,
// and the corresponding imaginary component apparing in the
// and the corresponding imaginary component app
e
aring in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
...
...
@@ -375,6 +585,28 @@ message TensorProto {
repeated
uint64
uint64_data
=
11
[
packed
=
true
];
}
// A serialized sparse-tensor value
message
SparseTensorProto
{
// The sequence of non-default values are encoded as a tensor of shape [NNZ].
// The default-value is zero for numeric tensors, and empty-string for string tensors.
optional
TensorProto
values
=
1
;
// The indices of the non-default values, which may be stored in one of two formats.
// (a) Indices can be a tensor of shape [NNZ, rank] with the [i,j]-th value
// corresponding to the j-th index of the i-th value (in the values tensor).
// (b) Indices can be a tensor of shape [NNZ], in which case the i-th value
// must be the linearized-index of the i-th value (in the values tensor).
// The linearized-index can be converted into an index tuple (k_1,...,k_rank)
// using the shape provided below.
// The indices must appear in ascending order without duplication.
// In the first format, the ordering is lexicographic-ordering:
// e.g., index-value [1,4] must appear before [2,1]
optional
TensorProto
indices
=
2
;
// The shape of the underlying dense-tensor: [dim_1, dim_2, ... dim_rank]
repeated
int64
dims
=
3
;
}
// Defines a tensor shape. A dimension can be either an integer value
// or a symbolic variable. A symbolic variable represents an unknown
// dimension.
...
...
@@ -384,28 +616,73 @@ message TensorShapeProto {
int64
dim_value
=
1
;
string
dim_param
=
2
;
// namespace Shape
};
// Standard denotation can optionally be used to denote tensor
// dimensions with standard semantic descriptions to ensure
// that operations are applied to the correct axis of a tensor.
// Refer to https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
// for pre-defined dimension denotations.
optional
string
denotation
=
3
;
};
repeated
Dimension
dim
=
1
;
}
// Define the types.
// Types
//
// The standard ONNX data types.
message
TypeProto
{
message
Tensor
{
// This field MUST NOT have the value of UNDEFINED
// This field MUST have a valid TensorProto.DataType value
// This field MUST be present for this version of the IR.
optional
TensorProto.DataType
elem_type
=
1
;
optional
int32
elem_type
=
1
;
optional
TensorShapeProto
shape
=
2
;
}
// repeated T
message
Sequence
{
// The type and optional shape of each element of the sequence.
// This field MUST be present for this version of the IR.
optional
TypeProto
elem_type
=
1
;
};
// map<K,V>
message
Map
{
// This field MUST have a valid TensorProto.DataType value
// This field MUST be present for this version of the IR.
// This field MUST refer to an integral type ([U]INT{8|16|32|64}) or STRING
optional
int32
key_type
=
1
;
// This field MUST be present for this version of the IR.
optional
TypeProto
value_type
=
2
;
};
oneof
value
{
// The type of a tensor.
Tensor
tensor_type
=
1
;
// NOTE: DNN-only implementations of ONNX MAY elect to not support non-tensor values
// as input and output to graphs and nodes. These types are needed to naturally
// support classical ML operators. DNN operators SHOULD restrict their input
// and output types to tensors.
// The type of a sequence.
Sequence
sequence_type
=
4
;
// The type of a map.
Map
map_type
=
5
;
}
// An optional denotation can be used to denote the whole
// type with a standard semantic description as to what is
// stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
// for pre-defined type denotations.
optional
string
denotation
=
6
;
}
// Operator Sets
//
// OperatorSets are uniquely identified by a (domain, opset_version) pair.
message
OperatorSetIdProto
{
// The domain of the operator set being identified.
...
...
@@ -418,3 +695,8 @@ message OperatorSetIdProto {
// This field MUST be present in this version of the IR.
optional
int64
version
=
2
;
}
// For using protobuf-lite
option
optimize_for
=
LITE_RUNTIME
;
src/program.cpp
View file @
3a848f0d
...
...
@@ -52,7 +52,9 @@ static void print_instruction(std::ostream& os,
os
<<
")"
;
}
os
<<
" -> "
<<
ins
->
get_shape
();
// skip return instruction shape
if
(
ins
->
name
()
!=
"@return"
)
os
<<
" -> "
<<
ins
->
get_shape
();
}
template
<
class
F
>
...
...
@@ -147,7 +149,14 @@ void program::assign(const program& p)
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
copy_inputs
.
begin
(),
[
&
](
auto
i
)
{
return
ins_map
[
i
];
});
copy_ins
=
add_instruction
(
ins
->
get_operator
(),
copy_inputs
);
if
(
ins
->
name
()
==
"@return"
)
{
copy_ins
=
add_return
(
copy_inputs
);
}
else
{
copy_ins
=
add_instruction
(
ins
->
get_operator
(),
copy_inputs
);
}
}
ins_map
[
ins
]
=
copy_ins
;
...
...
@@ -270,6 +279,18 @@ instruction_ref program::add_parameter(std::string name, shape s)
return
impl
->
instructions
.
begin
();
}
instruction_ref
program
::
add_return
(
std
::
vector
<
instruction_ref
>
args
)
{
assert
(
std
::
all_of
(
args
.
begin
(),
args
.
end
(),
[
&
](
instruction_ref
x
)
{
return
has_instruction
(
x
);
})
&&
"Argument is not an exisiting instruction"
);
impl
->
instructions
.
push_back
({
builtin
::
returns
{},
{},
args
});
auto
result
=
std
::
prev
(
impl
->
instructions
.
end
());
instruction
::
backreference
(
result
);
assert
(
result
->
valid
(
begin
()));
return
result
;
}
shape
program
::
get_parameter_shape
(
std
::
string
name
)
const
{
auto
ins
=
std
::
find_if
(
...
...
@@ -334,7 +355,26 @@ std::size_t program::size() const { return impl->instructions.size(); }
instruction_ref
program
::
begin
()
const
{
return
impl
->
instructions
.
begin
();
}
instruction_ref
program
::
end
()
const
{
return
impl
->
instructions
.
end
();
}
shape
program
::
get_shape
()
const
{
return
impl
->
instructions
.
back
().
get_shape
();
}
std
::
vector
<
shape
>
program
::
get_output_shapes
()
const
{
auto
last_ins
=
impl
->
instructions
.
back
();
if
(
last_ins
.
name
()
==
"@return"
)
{
auto
&
output_ins
=
last_ins
.
inputs
();
std
::
vector
<
shape
>
output_shapes
;
std
::
transform
(
output_ins
.
begin
(),
output_ins
.
end
(),
std
::
back_inserter
(
output_shapes
),
[](
auto
&
ins
)
{
return
ins
->
get_shape
();
});
return
output_shapes
;
}
// The else branch is to provide backward compatibility
else
{
return
{
last_ins
.
get_shape
()};
}
}
context
&
program
::
get_context
()
const
{
return
impl
->
ctx
;
}
...
...
@@ -372,10 +412,10 @@ void program::finalize()
}
template
<
class
F
>
argument
generic_eval
(
const
program
&
p
,
context
&
ctx
,
std
::
unordered_map
<
std
::
string
,
argument
>
params
,
F
trace
)
std
::
vector
<
argument
>
generic_eval
(
const
program
&
p
,
context
&
ctx
,
std
::
unordered_map
<
std
::
string
,
argument
>
params
,
F
trace
)
{
assert
(
p
.
validate
()
==
p
.
end
());
std
::
unordered_map
<
instruction_ref
,
argument
>
results
;
...
...
@@ -407,6 +447,19 @@ argument generic_eval(const program& p,
{
results
.
emplace
(
ins
,
trace
(
ins
,
[
&
]
{
return
argument
{
ins
->
get_shape
(),
nullptr
};
}));
}
else
if
(
name
==
"@return"
)
{
std
::
vector
<
argument
>
prog_outputs
;
std
::
transform
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
prog_outputs
),
[
&
](
instruction_ref
i
)
{
assert
(
results
.
find
(
i
)
!=
results
.
end
());
return
results
[
i
];
});
return
prog_outputs
;
}
else
{
values
.
resize
(
ins
->
inputs
().
size
());
...
...
@@ -421,10 +474,11 @@ argument generic_eval(const program& p,
}
assert
(
results
.
find
(
ins
)
!=
results
.
end
());
}
return
results
.
at
(
std
::
prev
(
p
.
end
()));
return
{
results
.
at
(
std
::
prev
(
p
.
end
()))};
}
argument
program
::
eval
(
std
::
unordered_map
<
std
::
string
,
argument
>
params
)
const
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
)
const
{
auto
&
ctx
=
this
->
impl
->
ctx
;
#ifndef NDEBUG
...
...
@@ -531,6 +585,11 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
print_program
(
*
this
,
[
&
](
auto
ins
,
const
auto
&
names
)
{
print_instruction
(
std
::
cout
,
ins
,
names
);
// skip return instruction
if
(
ins
->
name
()
==
"@return"
)
return
;
double
avg
=
common_average
(
ins_vec
[
ins
]);
double
percent
=
std
::
ceil
(
100.0
*
avg
/
total_instruction_time
);
os
<<
": "
<<
avg
<<
"ms, "
<<
percent
<<
"%"
;
...
...
src/py/migraphx_py.cpp
View file @
3a848f0d
...
...
@@ -158,7 +158,7 @@ PYBIND11_MODULE(migraphx, m)
py
::
class_
<
migraphx
::
program
>
(
m
,
"program"
)
.
def
(
"clone"
,
[](
migraphx
::
program
&
p
)
{
return
*
(
new
migraphx
::
program
(
p
));
})
.
def
(
"get_parameter_shapes"
,
&
migraphx
::
program
::
get_parameter_shapes
)
.
def
(
"get_shape"
,
&
migraphx
::
program
::
get_shape
)
.
def
(
"get_
output_
shape
s
"
,
&
migraphx
::
program
::
get_
output_
shape
s
)
.
def
(
"compile"
,
[](
migraphx
::
program
&
p
,
const
migraphx
::
target
&
t
,
bool
offload_copy
)
{
migraphx
::
compile_options
options
;
...
...
@@ -173,11 +173,20 @@ PYBIND11_MODULE(migraphx, m)
.
def
(
"__repr__"
,
[](
const
migraphx
::
program
&
p
)
{
return
migraphx
::
to_string
(
p
);
});
m
.
def
(
"parse_tf"
,
&
migraphx
::
parse_tf
,
[](
const
std
::
string
&
filename
,
bool
is_nhwc
,
unsigned
int
batch_size
)
{
return
migraphx
::
parse_tf
(
filename
,
migraphx
::
tf_options
{
is_nhwc
,
batch_size
});
},
"Parse tf protobuf (default format is nhwc)"
,
py
::
arg
(
"filename"
),
py
::
arg
(
"is_nhwc"
)
=
true
);
m
.
def
(
"parse_onnx"
,
&
migraphx
::
parse_onnx
);
py
::
arg
(
"is_nhwc"
)
=
true
,
py
::
arg
(
"batch_size"
)
=
1
);
m
.
def
(
"parse_onnx"
,
[](
const
std
::
string
&
filename
,
unsigned
int
batch_size
)
{
return
migraphx
::
parse_onnx
(
filename
,
migraphx
::
onnx_options
{
batch_size
});
},
"Parse onnx file"
,
py
::
arg
(
"filename"
),
py
::
arg
(
"batch_size"
)
=
1
);
m
.
def
(
"get_target"
,
[](
const
std
::
string
&
name
)
->
migraphx
::
target
{
if
(
name
==
"cpu"
)
...
...
src/quantization.cpp
View file @
3a848f0d
...
...
@@ -105,6 +105,9 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
ins
->
name
()
==
"@return"
)
break
;
// all indicates every instruction is converted
if
((
not
contains
(
ins_names
,
"all"
))
and
(
not
contains
(
ins_names
,
ins
->
name
())))
{
...
...
@@ -335,6 +338,9 @@ void quantize_int8_impl(program& prog,
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
map_ins_index
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
ins
->
name
()
==
"@return"
)
break
;
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
...
...
src/simplify_algebra.cpp
View file @
3a848f0d
...
...
@@ -27,6 +27,15 @@ auto conv_const_weights()
match
::
args
(
match
::
any
(),
match
::
is_constant
().
bind
(
"w"
)));
}
MIGRAPHX_PRED_MATCHER
(
args_has_same_ops
,
instruction_ref
ins
)
{
if
(
ins
->
inputs
().
empty
())
return
true
;
return
std
::
all_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
j
)
{
return
j
->
get_operator
()
==
ins
->
inputs
().
front
()
->
get_operator
();
});
}
struct
find_mul_conv
{
auto
matcher
()
const
...
...
@@ -167,6 +176,73 @@ struct find_inner_broadcast
}
};
struct
find_concat_unary
{
auto
matcher
()
const
{
return
match
::
name
(
"concat"
)(
args_has_same_ops
(),
match
::
arg
(
0
)(
match
::
nargs
(
1
),
match
::
name
(
"relu"
,
"broadcast"
).
bind
(
"x"
),
match
::
used_once
()));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
ins
=
r
.
result
;
auto
x
=
r
.
instructions
[
"x"
];
auto
op
=
x
->
get_operator
();
auto
axis
=
any_cast
<
op
::
concat
>
(
ins
->
get_operator
()).
axis
;
// Adjust broadcast lens
if
(
op
.
name
()
==
"broadcast"
)
{
auto
b
=
any_cast
<
op
::
broadcast
>
(
op
);
if
(
b
.
axis
!=
axis
)
return
;
b
.
broadcast_lens
=
ins
->
get_shape
().
lens
();
op
=
b
;
axis
=
0
;
}
auto
inputs
=
ins
->
inputs
();
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
auto
i
)
{
return
i
->
inputs
().
front
();
});
auto
concat
=
p
.
insert_instruction
(
ins
,
op
::
concat
{
axis
},
inputs
);
p
.
replace_instruction
(
ins
,
op
,
concat
);
}
};
struct
find_concat_binary
{
auto
matcher
()
const
{
return
match
::
name
(
"concat"
)(
args_has_same_ops
(),
match
::
arg
(
0
)(
match
::
nargs
(
2
),
match
::
name
(
"add"
,
"multiply"
).
bind
(
"x"
),
match
::
used_once
()));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
ins
=
r
.
result
;
auto
x
=
r
.
instructions
[
"x"
];
auto
op
=
x
->
get_operator
();
auto
concat_op
=
ins
->
get_operator
();
auto
xinputs
=
ins
->
inputs
();
std
::
transform
(
xinputs
.
begin
(),
xinputs
.
end
(),
xinputs
.
begin
(),
[
&
](
auto
i
)
{
return
i
->
inputs
().
front
();
});
auto
yinputs
=
ins
->
inputs
();
std
::
transform
(
yinputs
.
begin
(),
yinputs
.
end
(),
yinputs
.
begin
(),
[
&
](
auto
i
)
{
return
i
->
inputs
().
back
();
});
auto
xconcat
=
p
.
insert_instruction
(
ins
,
concat_op
,
xinputs
);
auto
yconcat
=
p
.
insert_instruction
(
ins
,
concat_op
,
yinputs
);
p
.
replace_instruction
(
ins
,
op
,
xconcat
,
yconcat
);
}
};
bool
axis_equal
(
const
std
::
vector
<
std
::
size_t
>&
x
,
const
std
::
vector
<
std
::
size_t
>&
y
,
std
::
size_t
axis
)
...
...
@@ -281,7 +357,9 @@ void simplify_algebra::apply(program& p) const
find_add_lit_broadcast
{},
find_add_convs
{},
find_mul_conv
{},
find_mul_add
{});
find_mul_add
{},
find_concat_unary
{},
find_concat_binary
{});
dead_code_elimination
{}.
apply
(
p
);
}
}
...
...
src/targets/cpu/lowering.cpp
View file @
3a848f0d
...
...
@@ -4,6 +4,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
@@ -144,13 +145,14 @@ struct cpu_lrn
int
height
=
output_shape
.
lens
()[
2
];
int
width
=
output_shape
.
lens
()[
3
];
float
alphaoverarea
=
op
.
alpha
/
float
(
op
.
size
);
int
radius
=
(
op
.
size
-
1
)
/
2
;
int
radius_lower
=
(
op
.
size
-
1
)
/
2
;
int
radius_upper
=
op
.
size
/
2
+
1
;
par_dfor
(
n_batch
,
height
,
width
)([
&
](
int
b
,
int
h
,
int
w
)
{
float
scale
=
0
;
dfor
(
channels
)([
&
](
int
c
)
{
auto
start
=
(
c
-
radius
)
<
0
?
0
:
(
c
-
radius
);
auto
end
=
(
c
+
radius
)
>
channels
?
channels
:
(
c
+
radius
);
auto
start
=
(
c
-
radius
_lower
)
<
0
?
0
:
(
c
-
radius
_lower
);
auto
end
=
(
c
+
radius
_upper
)
>
channels
?
channels
:
(
c
+
radius
_upper
);
for
(
auto
k
=
start
;
k
<
end
;
++
k
)
{
scale
+=
std
::
pow
(
input
(
b
,
k
,
h
,
w
),
2
);
...
...
@@ -220,6 +222,67 @@ struct cpu_convolution
}
};
template
<
class
Op
>
struct
cpu_deconvolution
{
Op
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::"
+
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input
,
auto
weights
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
std
::
fill
(
output
.
begin
(),
output
.
end
(),
type
{
0
});
auto
out_lens
=
output_shape
.
lens
();
auto
out_h
=
out_lens
[
2
];
auto
out_w
=
out_lens
[
3
];
auto
in
=
input
.
get_shape
().
lens
();
auto
in_n
=
in
[
0
];
auto
in_c
=
in
[
1
];
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
in_n
,
wei_c
)([
&
](
std
::
size_t
o
,
std
::
size_t
k
)
{
dfor
(
in_c
,
in_h
,
in_w
,
wei_h
,
wei_w
)(
[
&
](
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
out_x
=
start_x
+
x
*
op
.
dilation
[
0
];
const
int
out_y
=
start_y
+
y
*
op
.
dilation
[
1
];
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
out_x
>=
0
&&
out_x
<
out_h
&&
out_y
>=
0
&&
out_y
<
out_w
)
{
output
(
o
,
in_ch
,
out_x
,
out_y
)
+=
input
(
o
,
w
,
i
,
j
)
*
weights
(
w
,
k
,
x
,
y
);
}
});
});
});
return
result
;
}
};
struct
cpu_im2col
{
op
::
im2col
op
;
...
...
@@ -598,9 +661,10 @@ struct cpu_softmax
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
batch_lens
=
output_shape
.
lens
();
std
::
size_t
n_dims
=
batch_lens
[
op
.
axis
];
batch_lens
[
op
.
axis
]
=
1
;
auto
batch_lens
=
output_shape
.
lens
();
int64_t
tuned_axis
=
(
op
.
axis
<
0
)
?
op
.
axis
+
args
[
0
].
get_shape
().
lens
().
size
()
:
op
.
axis
;
std
::
size_t
n_dims
=
batch_lens
[
tuned_axis
];
batch_lens
[
tuned_axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
...
...
@@ -612,26 +676,26 @@ struct cpu_softmax
auto
idx
=
batch_shape
.
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
idx
[
tuned_
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
idx
[
tuned_
axis
]
=
j
;
std
::
size_t
index
=
output_shape
.
index
(
idx
);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
idx
[
tuned_
axis
]
=
j
;
batch_sum
[
i
]
+=
output
(
idx
.
begin
(),
idx
.
end
());
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
idx
[
tuned_
axis
]
=
j
;
output
(
idx
.
begin
(),
idx
.
end
())
=
op
.
output
()(
output
(
idx
.
begin
(),
idx
.
end
()),
batch_sum
[
i
]);
}
...
...
@@ -664,8 +728,10 @@ struct cpu_apply
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
<
op
::
convolution
>
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"deconvolution"
]
=
extend_op
<
cpu_deconvolution
<
op
::
deconvolution
>
,
op
::
deconvolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"quant_convolution"
]
=
extend_op
<
cpu_convolution
<
op
::
quant_convolution
>
,
op
::
quant_convolution
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
...
...
src/targets/gpu/CMakeLists.txt
View file @
3a848f0d
...
...
@@ -12,6 +12,7 @@ endif()
add_library
(
migraphx_device
device/acos.cpp
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
...
...
@@ -20,7 +21,9 @@ add_library(migraphx_device
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
...
...
@@ -43,10 +46,12 @@ add_library(migraphx_device
device/mul_add_relu.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/round.cpp
device/rsqrt.cpp
...
...
@@ -79,6 +84,7 @@ add_library(migraphx_gpu
lowering.cpp
pooling.cpp
convolution.cpp
deconvolution.cpp
quant_convolution.cpp
softmax.cpp
logsoftmax.cpp
...
...
src/targets/gpu/argmax.cpp
View file @
3a848f0d
...
...
@@ -14,7 +14,9 @@ shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
argument
hip_argmax
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
argmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
axis
);
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
int64_t
tuned_axis
=
(
op
.
axis
<
0
)
?
op
.
axis
+
n_dim
:
op
.
axis
;
device
::
argmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
return
args
.
back
();
}
...
...
src/targets/gpu/argmin.cpp
View file @
3a848f0d
...
...
@@ -14,7 +14,9 @@ shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
argument
hip_argmin
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
argmin
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
axis
);
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
int64_t
tuned_axis
=
(
op
.
axis
<
0
)
?
op
.
axis
+
n_dim
:
op
.
axis
;
device
::
argmin
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
return
args
.
back
();
}
...
...
src/targets/gpu/deconvolution.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_deconvolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
}
argument
miopen_deconvolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Running deconvolution failed"
);
return
args
[
3
];
}
shape
miopen_deconvolution
::
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
]);
auto
w_desc
=
make_tensor
(
inputs
[
1
]);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Find deconvolution failed"
);
handle
=
ctx
.
get_stream
().
get_miopen
();
algo
=
perf
.
fwd_algo
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_deconvolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"Workspace has changed during finalization."
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/acos.cpp
View file @
3a848f0d
...
...
@@ -9,7 +9,7 @@ namespace device {
void
acos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acos
(
to_hip_type
(
x
));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
acos
(
to_hip_type
(
x
));
});
}
}
// namespace device
...
...
src/targets/gpu/device/acosh.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/device/acosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acosh
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acosh
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
4
5
6
7
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment