Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
33a41ba0
Commit
33a41ba0
authored
Jun 14, 2019
by
Paul
Browse files
Merge branch 'develop' into batch-concat
parents
b092d017
a7bd5ded
Changes
210
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
833 additions
and
551 deletions
+833
-551
src/include/migraphx/streamutils.hpp
src/include/migraphx/streamutils.hpp
+2
-0
src/include/migraphx/stringutils.hpp
src/include/migraphx/stringutils.hpp
+3
-2
src/include/migraphx/tensor_view.hpp
src/include/migraphx/tensor_view.hpp
+10
-2
src/instruction.cpp
src/instruction.cpp
+30
-8
src/onnx/cifar10.cpp
src/onnx/cifar10.cpp
+1
-1
src/onnx/onnx.cpp
src/onnx/onnx.cpp
+171
-73
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+9
-17
src/opt/memory_coloring_impl.hpp
src/opt/memory_coloring_impl.hpp
+17
-18
src/pass_manager.cpp
src/pass_manager.cpp
+42
-0
src/program.cpp
src/program.cpp
+123
-35
src/propagate_constant.cpp
src/propagate_constant.cpp
+54
-0
src/py/CMakeLists.txt
src/py/CMakeLists.txt
+1
-6
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+5
-6
src/quantization.cpp
src/quantization.cpp
+107
-0
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+31
-19
src/schedule.cpp
src/schedule.cpp
+3
-1
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+1
-1
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+7
-9
src/targets/cpu/gemm.cpp
src/targets/cpu/gemm.cpp
+9
-3
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+207
-350
No files found.
src/include/migraphx/streamutils.hpp
View file @
33a41ba0
...
...
@@ -42,7 +42,9 @@ template <class Range>
auto
stream_write_value_impl
(
rank
<
1
>
,
std
::
ostream
&
os
,
const
Range
&
r
)
->
decltype
(
r
.
begin
(),
r
.
end
(),
void
())
{
os
<<
"{"
;
os
<<
stream_range
(
r
);
os
<<
"}"
;
}
template
<
class
T
>
...
...
src/include/migraphx/stringutils.hpp
View file @
33a41ba0
...
...
@@ -38,8 +38,9 @@ inline std::string join_strings(Strings strings, const std::string& delim)
return
""
;
auto
nit
=
std
::
next
(
it
);
return
std
::
accumulate
(
nit
,
strings
.
end
(),
*
it
,
[
&
](
std
::
string
x
,
std
::
string
y
)
{
return
x
+
delim
+
y
;
});
return
std
::
accumulate
(
nit
,
strings
.
end
(),
*
it
,
[
&
](
std
::
string
x
,
std
::
string
y
)
{
return
std
::
move
(
x
)
+
delim
+
std
::
move
(
y
);
});
}
template
<
class
F
>
...
...
src/include/migraphx/tensor_view.hpp
View file @
33a41ba0
...
...
@@ -12,6 +12,14 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
template
<
class
T
>
T
as_number
(
T
x
)
{
return
x
;
}
inline
int32_t
as_number
(
int8_t
x
)
{
return
static_cast
<
int32_t
>
(
x
);
}
inline
uint32_t
as_number
(
uint8_t
x
)
{
return
static_cast
<
uint32_t
>
(
x
);
}
template
<
class
T
>
struct
tensor_view
{
...
...
@@ -130,10 +138,10 @@ struct tensor_view
{
if
(
!
x
.
empty
())
{
os
<<
x
.
front
();
os
<<
as_number
(
x
.
front
()
)
;
for
(
std
::
size_t
i
=
1
;
i
<
x
.
m_shape
.
elements
();
i
++
)
{
os
<<
", "
<<
x
.
m_data
[
x
.
m_shape
.
index
(
i
)];
os
<<
", "
<<
as_number
(
x
.
m_data
[
x
.
m_shape
.
index
(
i
)]
)
;
}
}
return
os
;
...
...
src/instruction.cpp
View file @
33a41ba0
...
...
@@ -28,6 +28,12 @@ void instruction::replace(const shape& r)
}
}
void
instruction
::
replace
(
operation
o
)
{
op
=
std
::
move
(
o
);
recompute_shape
();
}
void
instruction
::
recompute_shape
()
{
replace
(
compute_shape
(
op
,
arguments
));
}
void
instruction
::
clear_arguments
()
...
...
@@ -162,7 +168,24 @@ void instruction::replace_argument(instruction_ref old, instruction_ref new_ins)
old
->
remove_output
(
*
this
);
}
argument
instruction
::
eval
()
const
bool
instruction
::
can_eval
()
const
{
if
(
op
.
name
()
==
"@literal"
)
{
return
true
;
}
else
if
(
is_context_free
(
op
))
{
return
std
::
all_of
(
this
->
inputs
().
begin
(),
this
->
inputs
().
end
(),
[](
auto
arg
)
{
return
arg
->
can_eval
();
});
}
else
{
return
false
;
}
}
argument
instruction
::
eval
(
bool
check_eval
)
const
{
if
(
op
.
name
()
==
"@literal"
)
{
...
...
@@ -170,14 +193,13 @@ argument instruction::eval() const
}
if
(
is_context_free
(
op
))
{
std
::
vector
<
argument
>
args
;
for
(
auto
&&
arg
:
this
->
inputs
())
{
argument
a
=
arg
->
eval
();
if
(
a
.
empty
())
if
(
check_eval
and
not
this
->
can_eval
())
return
{};
args
.
push_back
(
a
);
}
std
::
vector
<
argument
>
args
;
std
::
transform
(
this
->
inputs
().
begin
(),
this
->
inputs
().
end
(),
std
::
back_inserter
(
args
),
[](
auto
arg
)
{
return
arg
->
eval
(
false
);
});
return
op
.
compute
(
result
,
args
);
}
return
{};
...
...
src/onnx/cifar10.cpp
View file @
33a41ba0
...
...
@@ -32,7 +32,7 @@ auto read_cifar10_images(const std::string& full_path)
labels
[
i
]
=
*
pimage
++
;
for
(
size_t
j
=
0
;
j
<
nbytes_per_image
;
j
++
)
{
float
v
=
*
(
pimage
+
j
)
/
255.0
f
;
float
v
=
float
(
*
(
pimage
+
j
)
)
/
255.0
f
;
data
[
i
*
nbytes_per_image
+
j
]
=
v
;
}
}
...
...
src/onnx/onnx.cpp
View file @
33a41ba0
...
...
@@ -36,7 +36,6 @@ struct onnx_parser
onnx_parser
()
{
add_generic_op
(
"MatMul"
,
op
::
dot
{});
add_generic_op
(
"Relu"
,
op
::
relu
{});
add_generic_op
(
"Sigmoid"
,
op
::
sigmoid
{});
add_generic_op
(
"Abs"
,
op
::
abs
{});
...
...
@@ -64,6 +63,7 @@ struct onnx_parser
add_variadic_op
(
"Max"
,
op
::
max
{});
add_variadic_op
(
"Min"
,
op
::
min
{});
add_mem_op
(
"Clip"
,
&
onnx_parser
::
parse_clip
);
add_mem_op
(
"LRN"
,
&
onnx_parser
::
parse_lrn
);
add_mem_op
(
"ImageScaler"
,
&
onnx_parser
::
parse_imagescaler
);
add_mem_op
(
"LeakyRelu"
,
&
onnx_parser
::
parse_leaky_relu
);
...
...
@@ -77,6 +77,7 @@ struct onnx_parser
add_mem_op
(
"Reshape"
,
&
onnx_parser
::
parse_reshape
);
add_mem_op
(
"Flatten"
,
&
onnx_parser
::
parse_flatten
);
add_mem_op
(
"Gemm"
,
&
onnx_parser
::
parse_gemm
);
add_mem_op
(
"MatMul"
,
&
onnx_parser
::
parse_matmul
);
add_mem_op
(
"BatchNormalization"
,
&
onnx_parser
::
parse_batchnorm
);
add_mem_op
(
"Softmax"
,
&
onnx_parser
::
parse_softmax
);
add_mem_op
(
"LogSoftmax"
,
&
onnx_parser
::
parse_logsoftmax
);
...
...
@@ -141,8 +142,8 @@ struct onnx_parser
if
(
broadcasted
!=
0
)
{
uint64_t
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
uint64_t
>
();
auto
l
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
args
[
0
]
->
get_shape
()},
args
[
1
]);
auto
l
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
args
[
0
]
->
get_shape
().
lens
()},
args
[
1
]);
return
prog
.
add_instruction
(
x
,
args
[
0
],
l
);
}
return
prog
.
add_instruction
(
x
,
args
);
...
...
@@ -154,10 +155,8 @@ struct onnx_parser
});
}
template
<
class
T
>
instruction_ref
add_broadcastable_binary_op
(
instruction_ref
arg0
,
instruction_ref
arg1
,
T
x
)
{
if
(
arg0
->
get_shape
().
lens
()
!=
arg1
->
get_shape
().
lens
())
std
::
vector
<
std
::
size_t
>
compute_broadcasted_lens
(
std
::
vector
<
std
::
size_t
>
s0
,
std
::
vector
<
std
::
size_t
>
s1
)
{
// Example:
// s0 = (3,2,4,5) and s1 = (2,1,1)
...
...
@@ -171,25 +170,33 @@ struct onnx_parser
// In this case we need to broadcast the (:,:,1:,:) axis
// of s0 plus the 1st dimension of s1 giving
// output_lens = (3,2,7,5)
//
// Get lengths for both arguments
const
std
::
vector
<
std
::
size_t
>*
s0
=
&
arg0
->
get_shape
().
lens
();
const
std
::
vector
<
std
::
size_t
>*
s1
=
&
arg1
->
get_shape
().
lens
();
// Make sure s0 is the smaller size
if
(
s0
->
size
()
>
s1
->
size
())
std
::
swap
(
s0
,
s1
);
std
::
vector
<
std
::
size_t
>
output_lens
(
*
s1
);
auto
offset
=
s1
->
size
()
-
s0
->
size
();
std
::
transform
(
s0
->
begin
(),
s0
->
end
(),
s1
->
begin
()
+
offset
,
output_lens
.
begin
()
+
offset
,
if
(
s0
.
size
()
>
s1
.
size
())
{
s0
.
swap
(
s1
);
}
std
::
vector
<
std
::
size_t
>
out_lens
(
s1
);
auto
offset
=
s1
.
size
()
-
s0
.
size
();
std
::
transform
(
s0
.
begin
(),
s0
.
end
(),
s1
.
begin
()
+
offset
,
out_lens
.
begin
()
+
offset
,
[](
auto
a
,
auto
b
)
{
return
std
::
max
(
a
,
b
);
});
auto
l0
=
prog
.
add_instruction
(
op
::
multibroadcast
{
output_lens
},
arg0
);
auto
l1
=
prog
.
add_instruction
(
op
::
multibroadcast
{
output_lens
},
arg1
);
return
out_lens
;
}
template
<
class
T
>
instruction_ref
add_broadcastable_binary_op
(
instruction_ref
arg0
,
instruction_ref
arg1
,
T
x
)
{
if
(
arg0
->
get_shape
().
lens
()
!=
arg1
->
get_shape
().
lens
())
{
// Get lengths for both arguments
auto
s0
=
arg0
->
get_shape
().
lens
();
auto
s1
=
arg1
->
get_shape
().
lens
();
auto
out_lens
=
compute_broadcasted_lens
(
s0
,
s1
);
auto
l0
=
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
arg0
);
auto
l1
=
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
arg1
);
return
prog
.
add_instruction
(
x
,
l0
,
l1
);
}
else
...
...
@@ -201,7 +208,7 @@ struct onnx_parser
template
<
class
T
>
void
add_generic_op
(
std
::
string
name
,
T
x
)
{
add_op
(
name
,
[
this
,
x
](
attribute_map
,
std
::
vector
<
instruction_ref
>
args
)
{
add_op
(
name
,
[
this
,
x
](
const
attribute_map
&
,
std
::
vector
<
instruction_ref
>
args
)
{
return
prog
.
add_instruction
(
x
,
args
);
});
}
...
...
@@ -209,7 +216,7 @@ struct onnx_parser
template
<
class
T
>
void
add_variadic_op
(
std
::
string
name
,
T
x
)
{
add_op
(
name
,
[
this
,
x
](
attribute_map
,
std
::
vector
<
instruction_ref
>
args
)
{
add_op
(
name
,
[
this
,
x
](
const
attribute_map
&
,
std
::
vector
<
instruction_ref
>
args
)
{
return
std
::
accumulate
(
std
::
next
(
args
.
begin
()),
args
.
end
(),
args
.
front
(),
...
...
@@ -219,6 +226,22 @@ struct onnx_parser
});
}
instruction_ref
parse_clip
(
const
std
::
string
&
,
const
attribute_map
&
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
op
::
clip
op
;
if
(
contains
(
attributes
,
"max"
))
{
op
.
max_val
=
parse_value
(
attributes
.
at
(
"max"
)).
at
<
float
>
();
}
if
(
contains
(
attributes
,
"min"
))
{
op
.
min_val
=
parse_value
(
attributes
.
at
(
"min"
)).
at
<
float
>
();
}
return
prog
.
add_instruction
(
op
,
std
::
move
(
args
));
}
instruction_ref
parse_softmax
(
const
std
::
string
&
,
const
attribute_map
&
,
std
::
vector
<
instruction_ref
>
args
)
{
...
...
@@ -300,7 +323,7 @@ struct onnx_parser
{
uint64_t
axis
=
1
;
auto
l1
=
prog
.
add_instruction
(
op
,
args
[
0
],
args
[
1
]);
auto
l2
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
l1
->
get_shape
()},
args
[
2
]);
auto
l2
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
l1
->
get_shape
()
.
lens
()
},
args
[
2
]);
return
prog
.
add_instruction
(
op
::
add
{},
l1
,
l2
);
}
return
prog
.
add_instruction
(
op
,
l0
,
args
[
1
]);
...
...
@@ -495,25 +518,86 @@ struct onnx_parser
auto
l2
=
(
transb
)
?
prog
.
add_instruction
(
op
::
transpose
{
perm
},
args
[
1
])
:
args
[
1
];
if
(
args
.
size
()
==
3
)
{
if
(
beta
!=
0.
f
)
if
(
beta
!=
0.
f
&&
args
[
2
]
->
get_shape
().
elements
()
>
0
)
{
auto
l3
=
prog
.
add_instruction
(
op
::
dot
{
alpha
},
l1
,
l2
);
a
ut
o
l4
=
args
[
2
]
;
if
(
l4
->
get_shape
().
scalar
())
// ignore args[2] (no C value added to alpha*A*B)
return
l3
;
if
(
beta
!=
1.
f
)
auto
out_lens
=
l1
->
get_shape
().
lens
(
);
o
ut
_lens
.
back
()
=
l2
->
get_shape
().
lens
().
back
()
;
auto
l3
=
args
[
2
];
auto
l3_lens
=
l3
->
get_shape
().
lens
()
;
if
(
!
std
::
equal
(
out_lens
.
begin
(),
out_lens
.
end
(),
l3_lens
.
begin
(),
l3_lens
.
end
())
)
{
auto
beta_val
=
prog
.
add_literal
(
beta
);
auto
l5
=
prog
.
add_instruction
(
op
::
scalar
{
args
[
2
]
->
get_shape
()},
beta_val
);
l4
=
prog
.
add_instruction
(
op
::
mul
{},
args
[
2
],
l5
);
l3
=
prog
.
add_instruction
(
op
::
multibroadcast
{
out_lens
},
args
[
2
]);
}
return
add_broadcastable_binary_op
(
l3
,
l
4
,
op
::
add
{}
);
return
prog
.
add_instruction
(
op
::
dot
{
alpha
,
beta
},
l1
,
l
2
,
l3
);
}
}
return
prog
.
add_instruction
(
op
::
dot
{
alpha
,
beta
},
l1
,
l2
);
}
instruction_ref
parse_matmul
(
const
std
::
string
&
,
const
attribute_map
&
,
std
::
vector
<
instruction_ref
>
args
)
{
auto
l0
=
args
[
0
];
auto
l1
=
args
[
1
];
auto
l0_lens
=
l0
->
get_shape
().
lens
();
auto
l1_lens
=
l1
->
get_shape
().
lens
();
// args[0] is a vector, prepend 1 to the shape
bool
is_a_prepended
=
false
;
if
(
l0_lens
.
size
()
==
1
)
{
is_a_prepended
=
true
;
l0_lens
.
insert
(
l0_lens
.
begin
(),
1
);
l0
=
prog
.
add_instruction
(
op
::
unsqueeze
{{
0
}},
args
[
0
]);
}
bool
is_b_appended
=
false
;
if
(
l1_lens
.
size
()
==
1
)
{
is_b_appended
=
true
;
l1_lens
.
push_back
(
1
);
l1
=
prog
.
add_instruction
(
op
::
unsqueeze
{{
1
}},
args
[
1
]);
}
instruction_ref
bl0
=
l0
;
instruction_ref
bl1
=
l1
;
if
(
!
std
::
equal
(
l0_lens
.
rbegin
()
+
2
,
l0_lens
.
rend
(),
l1_lens
.
rbegin
()
+
2
,
l1_lens
.
rend
()))
{
auto
l0_it
=
l0_lens
.
begin
()
+
l0_lens
.
size
()
-
2
;
std
::
vector
<
std
::
size_t
>
l0_broadcasted_lens
(
l0_lens
.
begin
(),
l0_it
);
auto
l1_it
=
l1_lens
.
begin
()
+
l1_lens
.
size
()
-
2
;
std
::
vector
<
std
::
size_t
>
l1_broadcasted_lens
(
l1_lens
.
begin
(),
l1_it
);
auto
output_lens
=
compute_broadcasted_lens
(
l0_broadcasted_lens
,
l1_broadcasted_lens
);
l0_broadcasted_lens
=
output_lens
;
l0_broadcasted_lens
.
insert
(
l0_broadcasted_lens
.
end
(),
l0_it
,
l0_lens
.
end
());
l1_broadcasted_lens
=
output_lens
;
l1_broadcasted_lens
.
insert
(
l1_broadcasted_lens
.
end
(),
l1_it
,
l1_lens
.
end
());
if
(
l0_lens
!=
l0_broadcasted_lens
)
{
bl0
=
prog
.
add_instruction
(
op
::
multibroadcast
{
l0_broadcasted_lens
},
l0
);
}
if
(
l1_lens
!=
l1_broadcasted_lens
)
{
bl1
=
prog
.
add_instruction
(
op
::
multibroadcast
{
l1_broadcasted_lens
},
l1
);
}
}
auto
dot_res
=
prog
.
add_instruction
(
op
::
dot
{
1.0
f
,
0.0
f
},
bl0
,
bl1
);
int64_t
num_axis
=
static_cast
<
int64_t
>
(
dot_res
->
get_shape
().
lens
().
size
());
if
(
is_a_prepended
)
{
dot_res
=
prog
.
add_instruction
(
op
::
squeeze
{{
num_axis
-
2
}},
dot_res
);
--
num_axis
;
}
if
(
is_b_appended
)
{
dot_res
=
prog
.
add_instruction
(
op
::
squeeze
{{
num_axis
-
1
}},
dot_res
);
}
return
dot_res
;
}
instruction_ref
parse_batchnorm
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
...
...
@@ -604,15 +688,15 @@ struct onnx_parser
auto
&&
bias_floats
=
attributes
[
"bias"
].
floats
();
bias
=
std
::
vector
<
float
>
(
bias_floats
.
begin
(),
bias_floats
.
end
());
}
auto
input_
shape
=
args
.
front
()
->
get_shape
();
auto
input_
lens
=
args
.
front
()
->
get_shape
()
.
lens
()
;
auto
scale_val
=
prog
.
add_literal
(
scale
);
auto
bias_vals
=
prog
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
bias
.
size
()}},
bias
});
auto
scale_tensor
=
prog
.
add_instruction
(
migraphx
::
op
::
scalar
{
input_
shape
},
scale_val
);
auto
scale_tensor
=
prog
.
add_instruction
(
migraphx
::
op
::
scalar
{
input_
lens
},
scale_val
);
auto
img_scaled
=
prog
.
add_instruction
(
migraphx
::
op
::
mul
{},
args
.
front
(),
scale_tensor
);
auto
bias_bcast
=
prog
.
add_instruction
(
migraphx
::
op
::
broadcast
{
1
,
input_
shape
},
bias_vals
);
auto
bias_bcast
=
prog
.
add_instruction
(
migraphx
::
op
::
broadcast
{
1
,
input_
lens
},
bias_vals
);
return
prog
.
add_instruction
(
migraphx
::
op
::
add
{},
img_scaled
,
bias_bcast
);
}
...
...
@@ -1294,28 +1378,26 @@ struct onnx_parser
static
literal
parse_tensor
(
const
onnx
::
TensorProto
&
t
)
{
std
::
vector
<
std
::
size_t
>
dims
(
t
.
dims
().
begin
(),
t
.
dims
().
end
());
// in case of scalar constants in onnx file, use dims=1 to fill initializer data
if
(
dims
.
empty
())
{
dims
=
{
1
};
}
if
(
t
.
has_raw_data
())
{
const
std
::
string
&
s
=
t
.
raw_data
();
switch
(
t
.
data_type
())
{
case
onnx
::
TensorProto
::
UNDEFINED
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
FLOAT
:
return
literal
{{
shape
::
float_type
,
dims
}
,
s
.
data
()
}
;
case
onnx
::
TensorProto
::
FLOAT
:
return
create_
literal
(
shape
::
float_type
,
dims
,
s
.
data
()
)
;
case
onnx
::
TensorProto
::
UINT8
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
INT8
:
return
literal
{{
shape
::
int32_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
UINT16
:
return
literal
{{
shape
::
int32_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
INT16
:
return
literal
{{
shape
::
int32_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
INT32
:
return
literal
{{
shape
::
int32_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
INT64
:
return
literal
{{
shape
::
int64_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
INT8
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
UINT16
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
INT16
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
INT32
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
INT64
:
return
create_literal
(
shape
::
int64_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
STRING
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
BOOL
:
return
literal
{{
shape
::
int32_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
FLOAT16
:
return
literal
{{
shape
::
half_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
DOUBLE
:
return
literal
{{
shape
::
double_type
,
dims
},
s
.
data
()};
case
onnx
::
TensorProto
::
BOOL
:
return
create_literal
(
shape
::
int32_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
FLOAT16
:
return
create_literal
(
shape
::
half_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
DOUBLE
:
return
create_literal
(
shape
::
double_type
,
dims
,
s
.
data
());
case
onnx
::
TensorProto
::
UINT32
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
UINT64
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
COMPLEX64
:
throw
std
::
runtime_error
(
""
);
...
...
@@ -1327,21 +1409,21 @@ struct onnx_parser
{
case
onnx
::
TensorProto
::
UNDEFINED
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
FLOAT
:
return
literal
{{
shape
::
float_type
,
dims
}
,
t
.
float_data
()
.
begin
(),
t
.
float_data
().
end
()}
;
return
create_
literal
(
shape
::
float_type
,
dims
,
t
.
float_data
()
)
;
case
onnx
::
TensorProto
::
UINT8
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
INT8
:
return
literal
{{
shape
::
int32_type
,
dims
}
,
t
.
int32_data
()
.
begin
(),
t
.
int32_data
().
end
()}
;
return
create_
literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
()
)
;
case
onnx
::
TensorProto
::
UINT16
:
return
literal
{{
shape
::
int32_type
,
dims
}
,
t
.
int32_data
()
.
begin
(),
t
.
int32_data
().
end
()}
;
return
create_
literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
()
)
;
case
onnx
::
TensorProto
::
INT16
:
return
literal
{{
shape
::
int32_type
,
dims
}
,
t
.
int32_data
()
.
begin
(),
t
.
int32_data
().
end
()}
;
return
create_
literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
()
)
;
case
onnx
::
TensorProto
::
INT32
:
return
literal
{{
shape
::
int32_type
,
dims
}
,
t
.
int32_data
()
.
begin
(),
t
.
int32_data
().
end
()}
;
return
create_
literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
()
)
;
case
onnx
::
TensorProto
::
INT64
:
return
literal
{{
shape
::
int64_type
,
dims
}
,
t
.
int64_data
()
.
begin
(),
t
.
int64_data
().
end
()}
;
return
create_
literal
(
shape
::
int64_type
,
dims
,
t
.
int64_data
()
)
;
case
onnx
::
TensorProto
::
STRING
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
BOOL
:
return
literal
{{
shape
::
int32_type
,
dims
}
,
t
.
int32_data
()
.
begin
(),
t
.
int32_data
().
end
()}
;
return
create_
literal
(
shape
::
int32_type
,
dims
,
t
.
int32_data
()
)
;
case
onnx
::
TensorProto
::
FLOAT16
:
{
std
::
vector
<
uint16_t
>
data_uint16
(
t
.
int32_data
().
begin
(),
t
.
int32_data
().
end
());
...
...
@@ -1350,11 +1432,10 @@ struct onnx_parser
data_uint16
.
end
(),
std
::
back_inserter
(
data_half
),
[](
uint16_t
raw_val
)
{
return
*
reinterpret_cast
<
half
*>
(
&
raw_val
);
});
return
literal
{{
shape
::
half_type
,
dims
}
,
data_half
.
begin
(),
data_half
.
end
()}
;
return
create_
literal
(
shape
::
half_type
,
dims
,
data_half
)
;
}
case
onnx
::
TensorProto
::
DOUBLE
:
return
literal
{
{
shape
::
double_type
,
dims
},
t
.
double_data
().
begin
(),
t
.
double_data
().
end
()};
return
create_literal
(
shape
::
double_type
,
dims
,
t
.
double_data
());
case
onnx
::
TensorProto
::
UINT32
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
UINT64
:
throw
std
::
runtime_error
(
""
);
case
onnx
::
TensorProto
::
COMPLEX64
:
throw
std
::
runtime_error
(
""
);
...
...
@@ -1363,6 +1444,23 @@ struct onnx_parser
MIGRAPHX_THROW
(
"Invalid tensor type"
);
}
static
literal
create_literal
(
shape
::
type_t
shape_type
,
const
std
::
vector
<
size_t
>&
dims
,
const
char
*
data
)
{
// in case of scalar constants in onnx file, use dims=1 to fill initializer data
if
(
dims
.
empty
())
return
literal
{{
shape_type
},
data
};
return
literal
{{
shape_type
,
dims
},
data
};
}
template
<
class
T
,
MIGRAPHX_REQUIRES
(
not
std
::
is_pointer
<
T
>{})
>
static
literal
create_literal
(
shape
::
type_t
shape_type
,
const
std
::
vector
<
size_t
>&
dims
,
T
data
)
{
if
(
dims
.
empty
())
return
literal
{{
shape_type
},
data
.
begin
(),
data
.
end
()};
return
literal
{{
shape_type
,
dims
},
data
.
begin
(),
data
.
end
()};
}
static
shape
parse_type
(
const
onnx
::
TypeProto
&
t
)
{
shape
::
type_t
shape_type
{};
...
...
src/opt/memory_coloring_impl.cpp
View file @
33a41ba0
#include <migraphx/op/load.hpp>
#include "memory_coloring_impl.hpp"
namespace
migraphx
{
...
...
@@ -62,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
}
}
long
long
offset
=
0
;
std
::
size_t
offset
=
0
;
while
(
!
conflict_queue
.
empty
())
{
live_range
*
range
=
conflict_queue
.
top
();
long
long
iter_offset
=
range
->
offset
;
std
::
size_t
iter_offset
=
range
->
offset
;
if
(
offset
>
iter_offset
)
{
offset
=
std
::
max
(
offset
,
iter_offset
+
range
->
size
);
...
...
@@ -96,7 +97,7 @@ void memory_coloring_impl::build()
if
(
num_of_instrs
==
0
)
return
;
int
cur_points
=
num_of_instrs
*
2
;
auto
cur_points
=
num_of_instrs
*
2
;
instruction_ref
iter
=
p_program
->
end
();
instruction_ref
begin
=
p_program
->
begin
();
std
::
vector
<
instruction_ref
>
dead_instrs
;
...
...
@@ -176,7 +177,7 @@ void memory_coloring_impl::build()
void
memory_coloring_impl
::
rewrite
()
{
std
::
vector
<
std
::
size_t
>
dims
;
dims
.
push_back
(
required_bytes
/
sizeof
(
float
));
dims
.
push_back
(
(
required_bytes
+
sizeof
(
float
)
-
1
)
/
sizeof
(
float
));
shape
s
=
{
shape
::
float_type
,
dims
};
instruction_ref
scratch_param
=
p_program
->
add_parameter
(
"scratch"
,
s
);
for
(
auto
ins
:
iterator_for
(
*
p_program
))
...
...
@@ -192,13 +193,13 @@ void memory_coloring_impl::rewrite()
continue
;
std
::
size_t
offset
=
0
;
if
(
interval
->
get_offset
()
=
=
invalid_offset
)
if
(
interval
->
get_offset
()
!
=
invalid_offset
)
{
assert
(
interval
->
result
.
bytes
()
==
0
);
offset
=
interval
->
get_offset
(
);
}
else
{
offset
=
interval
->
get_offset
(
);
assert
(
interval
->
result
.
bytes
()
==
0
);
}
if
(
is_allocate
(
ins
))
...
...
@@ -206,15 +207,6 @@ void memory_coloring_impl::rewrite()
p_program
->
replace_instruction
(
ins
,
op
::
load
{
ins
->
get_shape
(),
offset
},
scratch_param
);
}
else
if
(
is_literal
(
ins
))
{
#if 0
auto pre = p_program->add_literal(ins->lit);
bool pre_copy = (interval->get_begin() < earliest_end_point);
p_program->replace_instruction(
ins, write_literal{offset, pre_copy}, scratch_param, pre);
#endif
}
}
}
MIGRAPHX_DEBUG
(
dump
(
"---After rewrite---"
));
...
...
src/opt/memory_coloring_impl.hpp
View file @
33a41ba0
...
...
@@ -3,7 +3,6 @@
#include <migraphx/program.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/pass_config.hpp>
#include <migraphx/config.hpp>
...
...
@@ -22,15 +21,15 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
static
const
in
t
invalid_offset
=
-
1
;
static
const
std
::
size_
t
invalid_offset
=
std
::
numeric_limits
<
std
::
size_t
>::
max
()
;
struct
live_range
{
in
t
begin
;
// begin point in the instruction stream.
int
end
;
// end point in the instruction stream.
long
long
offset
;
// offset to base pointer of allocated memory trunk.
int
vn
;
// value number that identifies this live_range.
long
long
size
;
// size of required memory in bytes
std
::
size_
t
begin
;
// begin point in the instruction stream.
std
::
size_t
end
;
// end point in the instruction stream.
std
::
size_t
offset
;
// offset to base pointer of allocated memory trunk.
std
::
size_t
vn
;
// value number that identifies this live_range.
std
::
size_t
size
;
// size of required memory in bytes
#ifdef MIGRAPHX_DEBUG_OPT
void
dump
();
#endif
...
...
@@ -46,9 +45,9 @@ struct live_interval
is_live_on_entry
=
false
;
}
void
add_use
(
in
t
use
)
{
use_points
.
push_front
(
use
);
}
in
t
get_begin
()
const
{
return
segment
.
begin
;
}
in
t
get_end
()
const
{
return
segment
.
end
;
}
void
add_use
(
std
::
size_
t
use
)
{
use_points
.
push_front
(
use
);
}
std
::
size_
t
get_begin
()
const
{
return
segment
.
begin
;
}
std
::
size_
t
get_end
()
const
{
return
segment
.
end
;
}
long
long
get_offset
()
const
{
return
segment
.
offset
;
}
#ifdef MIGRAPHX_DEBUG_OPT
...
...
@@ -56,9 +55,9 @@ struct live_interval
#endif
live_range
segment
;
in
t
id
;
std
::
list
<
in
t
>
use_points
;
in
t
def_point
;
std
::
size_
t
id
;
std
::
list
<
std
::
size_
t
>
use_points
;
std
::
size_
t
def_point
;
shape
result
;
bool
is_literal
;
bool
is_live_on_entry
;
...
...
@@ -112,8 +111,8 @@ struct memory_coloring_impl
{
if
((
range1
.
size
==
0
)
||
(
range2
.
size
==
0
))
return
false
;
long
long
end1
=
range1
.
offset
+
range1
.
size
-
1
;
long
long
end2
=
range2
.
offset
+
range2
.
size
-
1
;
auto
end1
=
range1
.
offset
+
range1
.
size
-
1
;
auto
end2
=
range2
.
offset
+
range2
.
size
-
1
;
return
((
end1
<
range2
.
offset
)
||
(
end2
<
range1
.
offset
));
}
void
verify
();
...
...
@@ -126,8 +125,8 @@ struct memory_coloring_impl
{
bool
operator
()(
const
interval_ptr
i1
,
const
interval_ptr
i2
)
const
{
int
len1
=
i1
->
get_end
()
-
i1
->
get_begin
();
int
len2
=
i2
->
get_end
()
-
i2
->
get_begin
();
auto
len1
=
i1
->
get_end
()
-
i1
->
get_begin
();
auto
len2
=
i2
->
get_end
()
-
i2
->
get_begin
();
if
(
len1
!=
len2
)
{
return
(
len1
<
len2
);
...
...
@@ -159,7 +158,7 @@ struct memory_coloring_impl
int
num_of_lives
;
int
max_value_number
;
long
long
required_bytes
;
std
::
size_t
required_bytes
;
// The earliest program point where an live interval ends.
int
earliest_end_point
;
// The latest program point where an live interval ends.
...
...
src/pass_manager.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/program.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/target.hpp>
#include <migraphx/env.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/time.hpp>
#include <migraphx/iterator_for.hpp>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
run_passes
(
program
&
prog
,
const
std
::
vector
<
pass
>&
passes
,
tracer
trace
)
{
for
(
auto
&
p
:
passes
)
{
trace
(
"Pass: "
,
p
.
name
());
p
.
apply
(
prog
);
trace
(
prog
);
#ifndef NDEBUG
trace
(
"Validate ..."
);
auto
invalid
=
prog
.
validate
();
if
(
invalid
!=
prog
.
end
())
{
auto
index
=
std
::
distance
(
prog
.
begin
(),
invalid
);
MIGRAPHX_THROW
(
p
.
name
()
+
" pass produces invalid program at instruction "
+
std
::
to_string
(
index
)
+
": "
+
invalid
->
name
());
}
trace
();
#endif
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/program.cpp
View file @
33a41ba0
#include <migraphx/program.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op
erators
.hpp>
#include <migraphx/op
/identity
.hpp>
#include <migraphx/target.hpp>
#include <migraphx/env.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/time.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/pass_manager.hpp>
#include <iostream>
#include <sstream>
#include <algorithm>
...
...
@@ -55,18 +56,23 @@ static void print_instruction(std::ostream& os,
}
template
<
class
F
>
static
void
print_program
(
std
::
ostream
&
os
,
const
program
&
p
,
F
annonate
)
static
void
print_program
(
const
program
&
p
,
F
print_func
)
{
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
names
;
int
count
=
0
;
for
(
auto
ins
:
iterator_for
(
p
))
{
std
::
string
var_name
=
"@"
+
std
::
to_string
(
count
)
;
std
::
string
var_name
;
if
(
ins
->
name
()
==
"@param"
)
{
var_name
=
any_cast
<
builtin
::
param
>
(
ins
->
get_operator
()).
parameter
;
}
else
{
var_name
=
"@"
+
std
::
to_string
(
count
);
count
++
;
}
names
.
emplace
(
ins
,
var_name
);
// TODO: Use all_of
...
...
@@ -76,22 +82,78 @@ static void print_program(std::ostream& os, const program& p, F annonate)
(
void
)
arg
;
}
print_instruction
(
os
,
ins
,
names
);
annonate
(
ins
,
names
);
os
<<
std
::
endl
;
count
++
;
print_func
(
ins
,
names
);
}
}
program
::
program
()
:
impl
(
std
::
make_unique
<
program_impl
>
())
{}
program
::
program
(
program
&&
)
noexcept
=
default
;
program
&
program
::
operator
=
(
program
&&
)
noexcept
=
default
;
program
::~
program
()
noexcept
=
default
;
// copy constructor
program
::
program
(
const
program
&
p
)
{
assign
(
p
);
}
// copy assignment operator
program
&
program
::
operator
=
(
program
p
)
{
std
::
swap
(
p
.
impl
,
this
->
impl
);
return
*
this
;
}
void
program
::
assign
(
const
program
&
p
)
{
// clean the current program
if
(
!
impl
)
{
impl
=
std
::
make_unique
<
program_impl
>
();
}
else
if
(
!
impl
->
instructions
.
empty
())
{
impl
->
instructions
.
clear
();
}
impl
->
ctx
=
p
.
impl
->
ctx
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
p
))
{
instruction_ref
copy_ins
{};
if
(
ins
->
name
()
==
"@literal"
)
{
auto
l
=
ins
->
get_literal
();
copy_ins
=
impl
->
instructions
.
insert
(
impl
->
instructions
.
end
(),
instruction
{
l
});
}
else
if
(
ins
->
name
()
==
"@param"
)
{
auto
&&
name
=
any_cast
<
builtin
::
param
>
(
ins
->
get_operator
()).
parameter
;
auto
s
=
ins
->
get_shape
();
copy_ins
=
impl
->
instructions
.
insert
(
impl
->
instructions
.
end
(),
{
builtin
::
param
{
name
},
std
::
move
(
s
),
{}});
}
else
if
(
ins
->
name
()
==
"@outline"
)
{
auto
s
=
ins
->
get_shape
();
copy_ins
=
impl
->
instructions
.
insert
(
impl
->
instructions
.
end
(),
{
builtin
::
outline
{
s
},
s
,
{}});
}
else
{
// retrieve its mapped input
auto
inputs
=
ins
->
inputs
();
// ensure all inputs have its corresponding copy instructions
assert
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
i
)
{
return
ins_map
.
count
(
i
)
>
0
;
}));
std
::
vector
<
instruction_ref
>
copy_inputs
(
inputs
.
size
());
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
copy_inputs
.
begin
(),
[
&
](
auto
i
)
{
return
ins_map
[
i
];
});
copy_ins
=
add_instruction
(
ins
->
get_operator
(),
copy_inputs
);
}
ins_map
[
ins
]
=
copy_ins
;
}
}
instruction_ref
program
::
add_instruction
(
const
operation
&
op
,
std
::
vector
<
instruction_ref
>
args
)
{
return
insert_instruction
(
impl
->
instructions
.
end
(),
op
,
std
::
move
(
args
));
...
...
@@ -291,23 +353,7 @@ void program::compile(const target& t, tracer trace)
trace
=
tracer
{
std
::
cout
};
trace
(
*
this
);
trace
();
for
(
auto
&&
p
:
t
.
get_passes
(
this
->
impl
->
ctx
))
{
trace
(
"Pass: "
,
p
.
name
());
p
.
apply
(
*
this
);
trace
(
*
this
);
#ifndef NDEBUG
trace
(
"Validate ..."
);
auto
invalid
=
this
->
validate
();
if
(
invalid
!=
impl
->
instructions
.
end
())
{
auto
index
=
std
::
distance
(
impl
->
instructions
.
begin
(),
invalid
);
MIGRAPHX_THROW
(
p
.
name
()
+
" pass produces invalid program at instruction "
+
std
::
to_string
(
index
)
+
": "
+
invalid
->
name
());
}
trace
();
#endif
}
run_passes
(
*
this
,
t
.
get_passes
(
this
->
impl
->
ctx
),
trace
);
auto
invalid
=
this
->
validate
();
if
(
invalid
!=
impl
->
instructions
.
end
())
{
...
...
@@ -391,13 +437,20 @@ argument program::eval(std::unordered_map<std::string, argument> params) const
#else
auto
check_context
=
[](
auto
f
)
{
return
f
();
};
#endif
if
(
enabled
(
MIGRAPHX_TRACE_EVAL
{}))
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_EVAL
{});
if
(
trace_level
>
0
)
{
return
generic_eval
(
*
this
,
ctx
,
std
::
move
(
params
),
[
&
](
auto
&
ins
,
auto
f
)
{
ctx
.
finish
();
std
::
cout
<<
"Run instruction: "
;
this
->
debug_print
(
ins
);
return
check_context
(
f
);
auto
result
=
check_context
(
f
);
ctx
.
finish
();
if
(
trace_level
>
1
and
ins
->
name
().
front
()
!=
'@'
and
ins
->
name
()
!=
"load"
)
std
::
cout
<<
"Ouput: "
<<
result
<<
std
::
endl
;
return
result
;
});
}
else
...
...
@@ -475,10 +528,12 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
double
calculate_overhead_time
=
total_time
-
total_instruction_time
;
double
calculate_overhead_percent
=
calculate_overhead_time
*
100.0
/
total_time
;
print_program
(
os
,
*
this
,
[
&
](
auto
ins
,
auto
&&
)
{
print_program
(
*
this
,
[
&
](
auto
ins
,
const
auto
&
names
)
{
print_instruction
(
std
::
cout
,
ins
,
names
);
double
avg
=
common_average
(
ins_vec
[
ins
]);
double
percent
=
std
::
ceil
(
100.0
*
avg
/
total_instruction_time
);
os
<<
": "
<<
avg
<<
"ms, "
<<
percent
<<
"%"
;
os
<<
std
::
endl
;
});
os
<<
std
::
endl
;
...
...
@@ -516,7 +571,7 @@ void program::debug_print(instruction_ref ins) const
return
;
}
std
::
stringstream
ss
;
print_program
(
ss
,
*
this
,
[
&
](
auto
x
,
auto
&
&
names
)
{
print_program
(
*
this
,
[
&
](
auto
x
,
const
auto
&
names
)
{
if
(
x
==
ins
)
{
print_instruction
(
std
::
cout
,
x
,
names
);
...
...
@@ -531,6 +586,32 @@ void program::debug_print(const std::vector<instruction_ref>& inss) const
std
::
cout
<<
std
::
endl
;
}
static
std
::
string
enclose_name
(
const
std
::
string
&
name
)
{
return
'"'
+
replace_string
(
name
,
"
\"
"
,
"
\\\"
"
)
+
'"'
;
}
void
program
::
print_graph
(
std
::
ostream
&
os
)
const
{
os
<<
"digraph {"
<<
std
::
endl
;
os
<<
"
\t
rankdir=LR;"
<<
std
::
endl
;
print_program
(
*
this
,
[
&
](
auto
ins
,
const
auto
&
names
)
{
os
<<
"
\t
"
<<
enclose_name
(
names
.
at
(
ins
))
<<
"[label="
<<
enclose_name
(
to_string
(
ins
->
get_operator
()))
<<
"];"
;
os
<<
std
::
endl
;
if
(
!
ins
->
inputs
().
empty
())
{
for
(
auto
&&
arg
:
ins
->
inputs
())
{
os
<<
"
\t
"
<<
enclose_name
(
names
.
at
(
arg
))
<<
" -> "
<<
enclose_name
(
names
.
at
(
ins
));
os
<<
"[label="
<<
enclose_name
(
to_string
(
ins
->
get_shape
()))
<<
"];"
;
os
<<
std
::
endl
;
}
}
});
os
<<
"}"
<<
std
::
endl
;
}
void
program
::
dry_run
(
std
::
unordered_map
<
std
::
string
,
argument
>
params
)
const
{
auto
&
ctx
=
this
->
impl
->
ctx
;
...
...
@@ -539,14 +620,21 @@ void program::dry_run(std::unordered_map<std::string, argument> params) const
void
program
::
annotate
(
std
::
ostream
&
os
,
std
::
function
<
void
(
instruction_ref
)
>
a
)
const
{
print_program
(
os
,
*
this
,
[
&
](
auto
ins
,
auto
&&
)
{
a
(
ins
);
});
print_program
(
*
this
,
[
&
](
auto
ins
,
const
auto
&
names
)
{
print_instruction
(
os
,
ins
,
names
);
a
(
ins
);
os
<<
std
::
endl
;
});
}
bool
operator
==
(
const
program
&
x
,
const
program
&
y
)
{
return
to_string
(
x
)
==
to_string
(
y
);
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
program
&
p
)
{
print_program
(
os
,
p
,
[](
auto
&&
...)
{});
print_program
(
p
,
[
&
](
auto
ins
,
const
auto
&
names
)
{
print_instruction
(
os
,
ins
,
names
);
os
<<
std
::
endl
;
});
return
os
;
}
...
...
src/propagate_constant.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/propagate_constant.hpp>
#include <migraphx/program.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/functional.hpp>
#include <unordered_set>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
bool
skip_propogate
(
instruction_ref
ins
)
{
if
(
ins
->
name
()
==
"@literal"
)
return
true
;
auto
&&
s
=
ins
->
get_shape
();
if
(
s
.
broadcasted
()
and
not
s
.
scalar
())
return
true
;
if
(
s
.
scalar
()
and
s
.
elements
()
!=
1
)
return
true
;
return
false
;
}
void
propagate_constant
::
apply
(
program
&
p
)
const
{
for
(
auto
i
:
iterator_for
(
p
))
{
if
(
i
->
name
()
!=
"@literal"
)
continue
;
if
(
i
->
outputs
().
empty
())
continue
;
fix
([
&
](
auto
self
,
auto
ins
)
{
std
::
unordered_set
<
instruction_ref
>
children
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
());
for
(
auto
child
:
children
)
{
if
(
skip_propogate
(
child
))
{
self
(
child
);
continue
;
}
auto
r
=
child
->
eval
();
if
(
not
r
.
empty
())
{
assert
(
r
.
get_shape
()
==
child
->
get_shape
());
auto
l
=
p
.
add_literal
(
r
.
get_shape
(),
r
.
data
());
self
(
p
.
replace_instruction
(
child
,
l
));
}
}
})(
i
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/py/CMakeLists.txt
View file @
33a41ba0
...
...
@@ -12,12 +12,7 @@ if(MIGRAPHX_ENABLE_PYTHON)
C_VISIBILITY_PRESET hidden
CXX_VISIBILITY_PRESET hidden
)
if
(
MIGRAPHX_ENABLE_TF
)
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_tf migraphx_cpu
)
target_compile_definitions
(
migraphx_py PRIVATE -DENABLE_TF
)
else
()
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_onnx migraphx_cpu
)
endif
()
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_cpu
)
if
(
MIGRAPHX_ENABLE_GPU
)
target_link_libraries
(
migraphx_py PRIVATE migraphx_gpu
)
target_compile_definitions
(
migraphx_py PRIVATE -DHAVE_GPU
)
...
...
src/py/migraphx_py.cpp
View file @
33a41ba0
...
...
@@ -2,14 +2,12 @@
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <migraphx/program.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/stringutils.hpp>
#ifdef ENABLE_TF
#include <migraphx/tf.hpp>
#else
#include <migraphx/onnx.hpp>
#endif
#ifdef HAVE_GPU
#include <migraphx/gpu/target.hpp>
...
...
@@ -160,16 +158,13 @@ PYBIND11_MODULE(migraphx, m)
.
def
(
"__ne__"
,
std
::
not_equal_to
<
migraphx
::
program
>
{})
.
def
(
"__repr__"
,
[](
const
migraphx
::
program
&
p
)
{
return
migraphx
::
to_string
(
p
);
});
#ifdef ENABLE_TF
m
.
def
(
"parse_tf"
,
&
migraphx
::
parse_tf
,
"Parse tf protobuf (default format is nhwc)"
,
py
::
arg
(
"filename"
),
py
::
arg
(
"is_nhwc"
)
=
true
);
#else
m
.
def
(
"parse_onnx"
,
&
migraphx
::
parse_onnx
);
#endif
m
.
def
(
"get_target"
,
[](
const
std
::
string
&
name
)
->
migraphx
::
target
{
if
(
name
==
"cpu"
)
return
migraphx
::
cpu
::
target
{};
...
...
@@ -181,6 +176,10 @@ PYBIND11_MODULE(migraphx, m)
});
m
.
def
(
"generate_argument"
,
&
migraphx
::
generate_argument
,
py
::
arg
(
"s"
),
py
::
arg
(
"seed"
)
=
0
);
m
.
def
(
"quantize"
,
[](
migraphx
::
program
&
p
,
std
::
vector
<
std
::
string
>&
ins_names
)
{
migraphx
::
quantize
(
p
,
ins_names
);
});
m
.
def
(
"quantize"
,
[](
migraphx
::
program
&
p
)
{
migraphx
::
quantize
(
p
,
{
"all"
});
});
#ifdef HAVE_GPU
m
.
def
(
"allocate_gpu"
,
&
migraphx
::
gpu
::
allocate_gpu
,
py
::
arg
(
"s"
),
py
::
arg
(
"host"
)
=
false
);
...
...
src/quantization.cpp
0 → 100644
View file @
33a41ba0
#include <migraphx/quantization.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
instruction_ref
insert_fp16
(
program
&
prog
,
instruction_ref
&
ins
,
shape
::
type_t
type
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_fp16
)
{
if
(
map_fp16
.
count
(
ins
)
>
0
)
{
return
map_fp16
[
ins
];
}
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
ins
->
get_shape
().
type
()
==
shape
::
double_type
);
instruction_ref
ins_fp16
{};
ins_fp16
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
map_fp16
[
ins
]
=
ins_fp16
;
return
ins_fp16
;
}
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
// all indicates every instruction is converted
if
((
not
contains
(
ins_names
,
"all"
))
and
(
not
contains
(
ins_names
,
ins
->
name
())))
{
continue
;
}
shape
::
type_t
orig_type
=
ins
->
get_shape
().
type
();
// process all inputs, if input is a fp32 or fp64, convert it
// to a fp16 by adding a convert operator.
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
instruction_ref
>
converted_inputs
;
for
(
auto
input
:
inputs
)
{
auto
s
=
input
->
get_shape
();
if
(
s
.
type
()
==
shape
::
float_type
||
s
.
type
()
==
shape
::
double_type
)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
input_fp16
{};
if
(
input
->
name
()
==
"convert"
)
{
input_fp16
=
input
->
inputs
().
front
();
}
else
{
input_fp16
=
insert_fp16
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
}
converted_inputs
.
push_back
(
input_fp16
);
}
else
{
converted_inputs
.
push_back
(
input
);
}
}
// no change for the input, go to the next instruction
if
(
inputs
==
converted_inputs
)
{
continue
;
}
auto
op
=
ins
->
get_operator
();
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
if
(
ins_shape
.
type
()
!=
orig_type
)
{
// insert another convert instruction to convert it back
if
(
ins
==
std
::
prev
(
prog
.
end
()))
{
prog
.
add_instruction
(
op
::
convert
{
orig_type
},
ins
);
}
else
{
// check the dead code case to avoid assert
bool
output_empty
=
ins
->
outputs
().
empty
();
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
if
(
!
output_empty
)
{
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
}
}
}
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
}
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/rewrite_rnn.cpp
View file @
33a41ba0
...
...
@@ -4,6 +4,7 @@
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/common.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -213,7 +214,7 @@ std::vector<instruction_ref> rewrite_rnn::vanilla_rnn_cell(bool is_forward,
auto
wb
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sbias
);
auto
rb
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sbias
);
auto
b
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wb
,
rb
);
bias
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
()},
b
);
bias
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
()
.
lens
()
},
b
);
}
instruction_ref
hidden_out
=
prog
.
end
();
...
...
@@ -520,25 +521,26 @@ std::vector<instruction_ref> rewrite_rnn::gru_cell(bool is_forward,
instruction_ref
brcst_bh
{};
if
(
bias
!=
prog
.
end
())
{
auto
broadcast_lens
=
sih
->
get_shape
().
lens
();
auto
sbias
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
bias
);
auto
wbz
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sbias
);
auto
wbr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sbias
);
auto
wbh
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sbias
);
brcst_wbh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
()
},
wbh
);
brcst_wbh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
wbh
);
auto
rbz
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
3
*
hs
},
{
4
*
hs
}},
sbias
);
auto
rbr
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
4
*
hs
},
{
5
*
hs
}},
sbias
);
auto
rbh
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
5
*
hs
},
{
6
*
hs
}},
sbias
);
brcst_rbh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
()
},
rbh
);
brcst_rbh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
rbh
);
auto
bz
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wbz
,
rbz
);
brcst_bz
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
()
},
bz
);
brcst_bz
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
bz
);
auto
br
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wbr
,
rbr
);
brcst_br
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
()
},
br
);
brcst_br
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
br
);
auto
bh
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
wbh
,
rbh
);
brcst_bh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
sih
->
get_shape
()
},
bh
);
brcst_bh
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
broadcast_lens
},
bh
);
}
for
(
long
i
=
0
;
i
<
seq_len
;
i
++
)
...
...
@@ -946,7 +948,7 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
// initial cell state
auto
sic
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
ic
);
auto
ic_
shape
=
sic
->
get_shape
();
auto
ic_
lens
=
sic
->
get_shape
()
.
lens
()
;
// bias
instruction_ref
bi_brcst
{};
...
...
@@ -955,26 +957,27 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
instruction_ref
bc_brcst
{};
if
(
bias
!=
prog
.
end
())
{
auto
sbias
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
bias
);
auto
bxi
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
sbias
);
auto
bhi
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
4
*
hs
},
{
5
*
hs
}},
sbias
);
auto
bi
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxi
,
bhi
);
bi_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
shape
},
bi
);
bi_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
lens
},
bi
);
auto
bxo
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
sbias
);
auto
bho
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
5
*
hs
},
{
6
*
hs
}},
sbias
);
auto
bo
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxo
,
bho
);
bo_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
shape
},
bo
);
bo_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
lens
},
bo
);
auto
bxf
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
sbias
);
auto
bhf
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
6
*
hs
},
{
7
*
hs
}},
sbias
);
auto
bf
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxf
,
bhf
);
bf_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
shape
},
bf
);
bf_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
lens
},
bf
);
auto
bxc
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
3
*
hs
},
{
4
*
hs
}},
sbias
);
auto
bhc
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
7
*
hs
},
{
8
*
hs
}},
sbias
);
auto
bc
=
prog
.
insert_instruction
(
ins
,
op
::
add
{},
bxc
,
bhc
);
bc_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
shape
},
bc
);
bc_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
lens
},
bc
);
}
// peep hole
...
...
@@ -986,13 +989,13 @@ std::vector<instruction_ref> rewrite_rnn::lstm_cell(bool is_forward,
{
auto
spph
=
prog
.
insert_instruction
(
ins
,
op
::
squeeze
{{
0
}},
pph
);
auto
pphi
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
0
},
{
hs
}},
spph
);
pphi_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
shape
},
pphi
);
pphi_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
lens
},
pphi
);
auto
ppho
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
hs
},
{
2
*
hs
}},
spph
);
ppho_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
shape
},
ppho
);
ppho_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
lens
},
ppho
);
auto
pphf
=
prog
.
insert_instruction
(
ins
,
op
::
slice
{{
0
},
{
2
*
hs
},
{
3
*
hs
}},
spph
);
pphf_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
shape
},
pphf
);
pphf_brcst
=
prog
.
insert_instruction
(
ins
,
op
::
broadcast
{
1
,
ic_
lens
},
pphf
);
}
for
(
long
i
=
0
;
i
<
seq_len
;
++
i
)
...
...
@@ -1166,5 +1169,14 @@ std::vector<operation> rewrite_rnn::lstm_actv_funcs(instruction_ref ins) const
}
}
namespace
op
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
rnn_direction
v
)
{
std
::
vector
<
std
::
string
>
rnn_direction_str
=
{
"forward"
,
"reverse"
,
"bidirectional"
};
os
<<
rnn_direction_str
[
static_cast
<
std
::
underlying_type
<
rnn_direction
>::
type
>
(
v
)];
return
os
;
}
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/schedule.cpp
View file @
33a41ba0
#include <migraphx/schedule.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op
erators
.hpp>
#include <migraphx/op
/identity
.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/functional.hpp>
...
...
@@ -341,6 +341,8 @@ struct stream_info
void
schedule
::
apply
(
program
&
p
)
const
{
if
(
not
enable
)
return
;
stream_info
si
;
auto
last
=
std
::
prev
(
p
.
end
());
si
.
accumulate_weights
(
last
,
model
);
...
...
src/simplify_algebra.cpp
View file @
33a41ba0
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/program.hpp>
#include <migraphx/op
erators
.hpp>
#include <migraphx/op
/add
.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/literal.hpp>
...
...
src/simplify_reshapes.cpp
View file @
33a41ba0
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op
erators
.hpp>
#include <migraphx/op
/as_shape
.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <unordered_set>
...
...
@@ -14,7 +14,9 @@ bool is_reshaper(instruction_ref ins)
// clang-format off
static
const
std
::
unordered_set
<
std
::
string
>
names
=
{
"reshape"
,
"contiguous"
"contiguous"
,
"squeeze"
,
"unsqueeze"
};
// clang-format on
return
contains
(
names
,
ins
->
name
());
...
...
@@ -45,6 +47,9 @@ void simplify_reshapes::apply(program& p) const
auto
end
=
std
::
prev
(
p
.
end
());
for
(
auto
ins
:
iterator_for
(
p
))
{
if
(
ins
==
end
and
ins
->
name
()
==
"contiguous"
)
continue
;
// Skip possible dead instructions
if
(
ins
->
outputs
().
empty
()
and
ins
!=
end
)
continue
;
if
(
is_reshaper
(
ins
))
...
...
@@ -94,13 +99,6 @@ void simplify_reshapes::apply(program& p) const
p
.
replace_instruction
(
ins
,
t
->
inputs
().
front
());
}
}
// Replace all reshapes with as_shape
for
(
auto
ins
:
iterator_for
(
p
))
{
if
(
ins
->
name
()
!=
"reshape"
)
continue
;
p
.
replace_instruction
(
ins
,
op
::
as_shape
{
ins
->
get_shape
()},
ins
->
inputs
());
}
}
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/cpu/gemm.cpp
View file @
33a41ba0
...
...
@@ -55,7 +55,13 @@ void migemm_impl(tensor_view<T> cmat,
visit_mat
(
amat
,
[
&
](
const
auto
&
a
)
{
visit_mat
(
bmat
,
[
&
](
const
auto
&
b
)
{
auto
c
=
make_mat
(
cmat
);
c
=
(
a
*
b
)
*
alpha
+
beta
*
c
;
c
=
beta
*
c
;
// This is a simple optimization to avoid
// compute A * B if alpha is 0.0
if
(
alpha
!=
0.0
)
{
c
=
c
+
alpha
*
a
*
b
;
}
});
});
}
...
...
@@ -95,8 +101,8 @@ void migemm_impl(
{
auto
lens
=
amat
.
get_shape
().
lens
();
bool
batch_mul
=
std
::
accumulate
(
lens
.
begin
(),
lens
.
end
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
())
==
(
*
lens
.
rbegin
()
)
*
(
*
(
lens
.
rbegin
()
+
1
))
;
std
::
accumulate
(
lens
.
rbegin
()
+
2
,
lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
())
==
1
;
if
(
batch_mul
)
{
migemm_impl
(
cmat
,
amat
,
bmat
,
alpha
,
beta
,
is_fast_gemm_type
<
T
>
{});
...
...
src/targets/cpu/lowering.cpp
View file @
33a41ba0
...
...
@@ -48,6 +48,12 @@ struct cpu_batch_norm_inference
{
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
...
...
@@ -75,10 +81,10 @@ struct cpu_batch_norm_inference
par_dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
assert
((
variance
(
c
)
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
)
*
(
buffer
(
n
,
c
,
h
,
w
)
-
mean
(
c
)
)
/
std
::
sqrt
(
variance
(
c
)
+
epsilon
)
+
bias
(
c
)
;
assert
((
variance
[
c
]
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
[
c
]
*
(
buffer
(
n
,
c
,
h
,
w
)
-
mean
[
c
]
)
/
std
::
sqrt
(
variance
[
c
]
+
epsilon
)
+
bias
[
c
]
;
});
});
}
...
...
@@ -107,6 +113,12 @@ struct cpu_lrn
{
op
::
lrn
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::lrn"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
...
@@ -117,7 +129,7 @@ struct cpu_lrn
int
channels
=
output_shape
.
lens
()[
1
];
int
height
=
output_shape
.
lens
()[
2
];
int
width
=
output_shape
.
lens
()[
3
];
float
alphaoverarea
=
op
.
alpha
/
op
.
size
;
float
alphaoverarea
=
op
.
alpha
/
float
(
op
.
size
)
;
int
radius
=
(
op
.
size
-
1
)
/
2
;
par_dfor
(
n_batch
,
height
,
width
)([
&
](
int
b
,
int
h
,
int
w
)
{
...
...
@@ -144,6 +156,12 @@ struct cpu_convolution
{
op
::
convolution
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
...
@@ -165,15 +183,15 @@ struct cpu_convolution
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
group_id
=
w
/
(
wei_n
/
op
.
group
);
const
auto
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
auto
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
double
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
int
in_x
=
start_x
+
x
;
const
int
in_y
=
start_y
+
y
;
const
int
in_ch
=
group_id
*
wei_c
+
k
;
const
auto
in_x
=
start_x
+
x
;
const
auto
in_y
=
start_y
+
y
;
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
acc
+=
input
(
o
,
in_ch
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
...
...
@@ -190,6 +208,12 @@ struct cpu_im2col
{
op
::
im2col
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
static
std
::
string
name
()
{
return
"cpu::im2col"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
...
...
@@ -209,10 +233,8 @@ struct cpu_im2col
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
int
kdiv2_h
;
int
kdiv2_w
;
kdiv2_h
=
kernel_h
/
2
;
kdiv2_w
=
kernel_w
/
2
;
auto
kdiv2_h
=
kernel_h
/
2
;
auto
kdiv2_w
=
kernel_w
/
2
;
// calculate output sizes
const
std
::
size_t
col_height
=
(
height
-
kernel_h
+
2
*
pad_h
)
/
stride_h
+
1
;
const
std
::
size_t
col_width
=
(
width
-
kernel_w
+
2
*
pad_w
)
/
stride_w
+
1
;
...
...
@@ -230,8 +252,8 @@ struct cpu_im2col
dfor
(
channels
,
kernel_h
,
kernel_w
)([
&
](
std
::
size_t
c
,
std
::
size_t
koffset
,
std
::
size_t
loffset
)
{
int
idx
=
iinput
+
koffset
-
kdiv2_h
;
int
jdx
=
jinput
+
loffset
-
kdiv2_w
;
auto
idx
=
iinput
+
koffset
-
kdiv2_h
;
auto
jdx
=
jinput
+
loffset
-
kdiv2_w
;
col
(
ldx
,
p
)
=
((
idx
>=
0
)
&&
(
idx
<
height
)
&&
(
jdx
>=
0
)
&&
(
jdx
<
width
))
?
input
(
0
,
c
,
idx
,
jdx
)
:
0
;
...
...
@@ -273,6 +295,12 @@ struct cpu_pooling
{
op
::
pooling
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::pooling_"
+
Op
::
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
...
@@ -317,20 +345,35 @@ struct cpu_pooling
}
};
struct
cpu_
contiguous
struct
cpu_
op
{
op
::
contiguous
op
;
std
::
string
name
()
const
{
return
"cpu::
contiguous"
;
}
op
eration
op
;
std
::
string
name
()
const
{
return
"cpu::
"
+
op
.
name
()
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
return
op
.
compute
(
output_shape
,
args
);
}
friend
bool
operator
==
(
const
cpu_op
&
x
,
const
cpu_op
&
y
)
{
return
x
.
op
==
y
.
op
;
}
friend
bool
operator
==
(
const
cpu_op
&
x
,
const
operation
&
y
)
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
if
(
x
.
name
()
!=
y
.
name
())
return
false
;
return
x
==
any_cast
<
cpu_op
>
(
y
);
}
friend
bool
operator
==
(
const
operation
&
x
,
const
cpu_op
&
y
)
{
return
y
==
x
;
}
};
struct
cpu_pad
{
op
::
pad
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
...
@@ -354,184 +397,54 @@ struct cpu_pad
}
};
struct
cpu_concat
{
op
::
concat
op
;
std
::
string
name
()
const
{
return
"cpu::concat"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
cpu_gemm
{
op
::
dot
op
;
std
::
string
name
()
const
{
return
"cpu::dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
migemm
(
result
,
args
[
0
],
args
[
1
],
op
.
alpha
,
op
.
beta
);
return
result
;
}
};
struct
cpu_gather
{
op
::
gather
op
;
std
::
string
name
()
const
{
return
"cpu::gather"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
identity_op
{
std
::
string
name
()
const
{
return
"cpu::identity"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
x
;
};
}
};
struct
abs_op
{
std
::
string
name
()
const
{
return
"cpu::abs"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
abs
(
make_signed
(
x
));
};
}
};
struct
exp_op
{
std
::
string
name
()
const
{
return
"cpu::exp"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
exp
(
x
);
};
}
};
struct
log_op
{
std
::
string
name
()
const
{
return
"cpu::log"
;
}
auto
fcn
()
const
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
[](
auto
x
)
{
return
std
::
log
(
x
);
}
;
return
migraphx
::
reflect
(
self
.
op
,
f
)
;
}
};
struct
sin_op
{
std
::
string
name
()
const
{
return
"cpu::sin"
;
}
auto
fcn
()
const
std
::
string
name
()
const
{
return
"cpu::dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
[](
auto
x
)
{
return
std
::
sin
(
x
);
};
}
};
struct
cos_op
{
std
::
string
name
()
const
{
return
"cpu::cos"
;
}
auto
fcn
()
const
if
(
inputs
.
size
()
==
3
)
{
return
[](
auto
x
)
{
return
std
::
cos
(
x
);
};
auto
c_shape
=
inputs
.
at
(
2
);
check_shapes
{{
c_shape
}}.
not_broadcasted
();
}
};
struct
tan_op
{
std
::
string
name
()
const
{
return
"cpu::tan"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
tan
(
x
);
};
return
op
.
compute_shape
(
inputs
);
}
};
struct
asin_op
{
std
::
string
name
()
const
{
return
"cpu::asin"
;
}
auto
fcn
()
const
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
[](
auto
x
)
{
return
std
::
asin
(
x
);
};
}
};
struct
acos_op
{
std
::
string
name
()
const
{
return
"cpu::acos"
;
}
auto
fcn
()
const
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrics, and C is broadcastable to A * B
if
(
args
.
size
()
==
3
)
{
return
[](
auto
x
)
{
return
std
::
acos
(
x
);
};
}
};
struct
atan_op
{
std
::
string
name
()
const
{
return
"cpu::atan"
;
}
auto
fcn
()
const
// no need to consider the value of args[2]
if
(
op
.
beta
==
0.0
f
)
{
re
turn
[](
auto
x
)
{
return
std
::
atan
(
x
);
};
re
sult
.
visit
([
&
](
auto
output
)
{
std
::
fill
(
output
.
begin
(),
output
.
end
(),
0
);
}
)
;
}
};
struct
sinh_op
{
std
::
string
name
()
const
{
return
"cpu::sinh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
sinh
(
x
);
};
}
};
struct
cosh_op
{
std
::
string
name
()
const
{
return
"cpu::cosh"
;
}
auto
fcn
()
const
else
{
return
[](
auto
x
)
{
return
std
::
cosh
(
x
);
};
visit_all
(
result
,
args
[
2
])([
&
](
auto
output
,
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
}
};
struct
tanh_op
{
std
::
string
name
()
const
{
return
"cpu::tanh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
tanh
(
x
);
};
}
};
migemm
(
result
,
args
[
0
],
args
[
1
],
op
.
alpha
,
op
.
beta
);
struct
sigmoid_op
{
std
::
string
name
()
const
{
return
"cpu::sigmoid"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
1.
f
/
(
1.
f
+
std
::
exp
(
-
x
));
};
return
result
;
}
};
struct
neg_op
{
std
::
string
name
()
const
{
return
"cpu::neg"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
-
x
;
};
}
};
// 2 input arguments
migemm
(
result
,
args
[
0
],
args
[
1
],
op
.
alpha
,
0.0
f
);
struct
relu_op
{
std
::
string
name
()
const
{
return
"cpu::relu"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
max
(
decltype
(
x
){
0
},
x
);
};
return
result
;
}
};
...
...
@@ -561,92 +474,76 @@ template <typename Op>
struct
cpu_unary
{
Op
op
;
std
::
string
name
()
const
{
return
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
inputs
.
front
();
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
argument
result
{
output_shape
};
result
.
visit
([
&
](
auto
output
)
{
args
[
0
].
visit
([
&
](
auto
input
)
{
std
::
transform
(
input
.
begin
(),
input
.
end
(),
output
.
begin
(),
op
.
fcn
());
});
});
return
result
;
return
migraphx
::
reflect
(
self
.
op
.
op
,
f
);
}
};
struct
softmax2d
{
std
::
string
name
()
const
{
return
"cpu::softmax2d"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
inputs
.
front
();
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
std
::
string
name
()
const
{
return
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
using
value_type
=
typename
decltype
(
input
)
::
value_type
;
auto
nb
=
input
.
get_shape
().
lens
()[
0
];
auto
nc
=
input
.
get_shape
().
lens
()[
1
];
auto
nh
=
input
.
get_shape
().
lens
()[
2
];
auto
nw
=
input
.
get_shape
().
lens
()[
3
];
dfor
(
nb
,
nh
,
nw
)([
&
](
std
::
size_t
b
,
std
::
size_t
i
,
std
::
size_t
j
)
{
value_type
cmax
=
std
::
numeric_limits
<
value_type
>::
lowest
();
for
(
int
c
=
0
;
c
<
nc
;
c
++
)
check_shapes
{
inputs
}.
has
(
1
);
auto
s
=
inputs
.
at
(
0
);
if
(
s
.
packed
())
{
cmax
=
std
::
max
(
cmax
,
input
(
b
,
c
,
i
,
j
))
;
return
s
;
}
for
(
int
c
=
0
;
c
<
nc
;
c
++
)
else
{
output
(
b
,
c
,
i
,
j
)
=
std
::
exp
(
input
(
b
,
c
,
i
,
j
)
-
cmax
);
return
{
s
.
type
(),
s
.
lens
()};
}
}
value_type
sum
=
value_type
(
0
);
for
(
int
c
=
0
;
c
<
nc
;
c
++
)
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
sum
+=
output
(
b
,
c
,
i
,
j
);
argument
result
{
output_shape
};
result
.
visit
([
&
](
auto
output
)
{
args
[
0
].
visit
([
&
](
auto
input
)
{
if
(
input
.
get_shape
().
standard
())
{
std
::
transform
(
input
.
begin
(),
input
.
end
(),
output
.
begin
(),
op
.
fcn
());
}
for
(
int
c
=
0
;
c
<
nc
;
c
++
)
else
{
output
(
b
,
c
,
i
,
j
)
=
output
(
b
,
c
,
i
,
j
)
/
sum
;
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
op
.
fcn
()(
input
(
idx
.
begin
(),
idx
.
end
()));
});
}
});
});
return
result
;
}
};
struct
cpu_
log
softmax
struct
cpu_softmax
{
op
::
logsoftmax
op
;
std
::
string
name
()
const
{
return
"cpu::logsoftmax"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
op
::
softmax
op
;
template
<
typename
T
>
std
::
size_t
compute_batch_index
(
const
T
&
idx
,
shape
&
batch_shape
,
int
axis
)
const
{
if
(
axis
==
0
)
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
0
;
return
migraphx
::
reflect
(
self
.
op
,
f
)
;
}
else
std
::
string
name
()
const
{
return
"cpu::softmax"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
template
<
typename
T
>
std
::
size_t
compute_batch_index
(
T
idx
,
shape
&
batch_shape
,
int
axis
)
const
{
std
::
vector
<
std
::
size_t
>
batch_idx
(
idx
.
begin
(),
idx
.
begin
()
+
axis
);
return
batch_shape
.
index
(
batch_idx
.
begin
(),
batch_idx
.
end
());
}
idx
[
axis
]
=
0
;
return
batch_shape
.
index
(
idx
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
lens
=
output_shape
.
lens
();
std
::
vector
<
std
::
size_t
>
batch_lens
{};
if
(
op
.
axis
==
0
)
{
batch_lens
.
push_back
(
1
);
}
else
{
batch_lens
.
insert
(
batch_lens
.
begin
(),
lens
.
begin
(),
lens
.
begin
()
+
op
.
axis
);
}
shape
batch_shape
{
migraphx
::
shape
::
uint32_type
,
batch_lens
};
auto
batch_lens
=
output_shape
.
lens
();
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
using
value_type
=
typename
decltype
(
input
)
::
value_type
;
std
::
vector
<
value_type
>
batch_max
(
batch_shape
.
elements
(),
...
...
@@ -658,23 +555,19 @@ struct cpu_logsoftmax
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
output
(
idx
.
begin
(),
idx
.
end
())
=
input
(
idx
.
begin
(),
idx
.
end
())
-
batch_max
[
index
];
output
(
idx
.
begin
(),
idx
.
end
())
=
std
::
exp
(
input
(
idx
.
begin
(),
idx
.
end
())
-
batch_max
[
index
]);
});
std
::
vector
<
value_type
>
batch_sum
(
batch_shape
.
elements
(),
value_type
(
0
));
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
batch_sum
[
index
]
+=
std
::
exp
(
output
(
idx
.
begin
(),
idx
.
end
())
)
;
batch_sum
[
index
]
+=
output
(
idx
.
begin
(),
idx
.
end
());
});
for
(
std
::
size_t
i
=
0
;
i
<
batch_sum
.
size
();
++
i
)
{
batch_sum
[
i
]
=
std
::
log
(
batch_sum
[
i
]);
}
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
output
(
idx
.
begin
(),
idx
.
end
())
-
=
batch_sum
[
index
];
output
(
idx
.
begin
(),
idx
.
end
())
/
=
batch_sum
[
index
];
});
});
...
...
@@ -682,83 +575,64 @@ struct cpu_logsoftmax
}
};
struct
add_op
{
std
::
string
name
()
const
{
return
"add"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
x
+
y
;
};
}
};
struct
sub_op
struct
cpu_logsoftmax
{
std
::
string
name
()
const
{
return
"sub"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
x
-
y
;
};
}
};
op
::
logsoftmax
op
;
struct
mul_op
{
std
::
string
name
()
const
{
return
"mul"
;
}
auto
fcn
()
const
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
[](
auto
x
,
auto
y
)
{
return
x
*
y
;
}
;
return
migraphx
::
reflect
(
self
.
op
,
f
)
;
}
};
struct
div_op
{
std
::
string
name
()
const
{
return
"div"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
x
/
y
;
};
}
};
struct
max_op
{
std
::
string
name
()
const
{
return
"max"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
max
(
x
,
y
);
};
}
};
std
::
string
name
()
const
{
return
"cpu::logsoftmax"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
struct
min_op
{
std
::
string
name
()
const
{
return
"min"
;
}
auto
fcn
()
const
template
<
typename
T
>
std
::
size_t
compute_batch_index
(
T
idx
,
const
shape
&
batch_shape
,
int
axis
)
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
min
(
x
,
y
);
};
idx
[
axis
]
=
0
;
return
batch_shape
.
index
(
idx
);
}
};
template
<
typename
Op
>
struct
cpu_binary
{
Op
op
;
std
::
string
name
()
const
{
return
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
inputs
.
front
();
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
if
(
input1
.
get_shape
().
packed
()
and
input2
.
get_shape
().
packed
())
auto
batch_lens
=
output_shape
.
lens
();
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
using
value_type
=
typename
decltype
(
input
)
::
value_type
;
std
::
vector
<
value_type
>
batch_max
(
batch_shape
.
elements
(),
std
::
numeric_limits
<
value_type
>::
lowest
());
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
batch_max
[
index
]
=
std
::
max
(
batch_max
[
index
],
input
(
idx
.
begin
(),
idx
.
end
()));
});
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
output
(
idx
.
begin
(),
idx
.
end
())
=
input
(
idx
.
begin
(),
idx
.
end
())
-
batch_max
[
index
];
});
std
::
vector
<
value_type
>
batch_sum
(
batch_shape
.
elements
(),
value_type
(
0
));
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
batch_sum
[
index
]
+=
std
::
exp
(
output
(
idx
.
begin
(),
idx
.
end
()));
});
for
(
std
::
size_t
i
=
0
;
i
<
batch_sum
.
size
();
++
i
)
{
std
::
transform
(
input1
.
begin
(),
input1
.
end
(),
input2
.
begin
(),
output
.
begin
(),
op
.
fcn
());
batch_sum
[
i
]
=
std
::
log
(
batch_sum
[
i
]);
}
else
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
op
.
fcn
()(
input1
(
idx
.
begin
(),
idx
.
end
()),
input2
(
idx
.
begin
(),
idx
.
end
()));
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
output
(
idx
.
begin
(),
idx
.
end
())
-=
batch_sum
[
index
];
});
}
});
return
result
;
}
};
...
...
@@ -782,43 +656,17 @@ struct cpu_apply
void
init
()
{
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"contiguous"
]
=
extend_op
<
cpu_contiguous
,
op
::
contiguous
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"concat"
]
=
extend_op
<
cpu_concat
,
op
::
concat
>
();
apply_map
[
"gather"
]
=
extend_op
<
cpu_gather
,
op
::
gather
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"identity"
]
=
simple_op
<
cpu_unary
<
identity_op
>>
();
apply_map
[
"abs"
]
=
simple_op
<
cpu_unary
<
abs_op
>>
();
apply_map
[
"sinh"
]
=
simple_op
<
cpu_unary
<
sinh_op
>>
();
apply_map
[
"cosh"
]
=
simple_op
<
cpu_unary
<
cosh_op
>>
();
apply_map
[
"tanh"
]
=
simple_op
<
cpu_unary
<
tanh_op
>>
();
apply_map
[
"sigmoid"
]
=
simple_op
<
cpu_unary
<
sigmoid_op
>>
();
apply_map
[
"exp"
]
=
simple_op
<
cpu_unary
<
exp_op
>>
();
apply_map
[
"log"
]
=
simple_op
<
cpu_unary
<
log_op
>>
();
apply_map
[
"neg"
]
=
simple_op
<
cpu_unary
<
neg_op
>>
();
apply_map
[
"sin"
]
=
simple_op
<
cpu_unary
<
sin_op
>>
();
apply_map
[
"cos"
]
=
simple_op
<
cpu_unary
<
cos_op
>>
();
apply_map
[
"tan"
]
=
simple_op
<
cpu_unary
<
tan_op
>>
();
apply_map
[
"asin"
]
=
simple_op
<
cpu_unary
<
asin_op
>>
();
apply_map
[
"acos"
]
=
simple_op
<
cpu_unary
<
acos_op
>>
();
apply_map
[
"atan"
]
=
simple_op
<
cpu_unary
<
atan_op
>>
();
apply_map
[
"relu"
]
=
simple_op
<
cpu_unary
<
relu_op
>>
();
apply_map
[
"add"
]
=
simple_op
<
cpu_binary
<
add_op
>>
();
apply_map
[
"sub"
]
=
simple_op
<
cpu_binary
<
sub_op
>>
();
apply_map
[
"mul"
]
=
simple_op
<
cpu_binary
<
mul_op
>>
();
apply_map
[
"div"
]
=
simple_op
<
cpu_binary
<
div_op
>>
();
apply_map
[
"max"
]
=
simple_op
<
cpu_binary
<
max_op
>>
();
apply_map
[
"min"
]
=
simple_op
<
cpu_binary
<
min_op
>>
();
apply_map
[
"softmax"
]
=
simple_op
<
softmax2d
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
}
void
apply
()
...
...
@@ -834,7 +682,16 @@ struct cpu_apply
{
apply_map
.
at
(
it
->
name
())(
it
);
}
else
if
(
is_context_free
(
it
->
get_operator
()))
{
apply_cpu_op
(
it
);
}
}
}
void
apply_cpu_op
(
instruction_ref
ins
)
{
prog
->
replace_instruction
(
ins
,
cpu_op
{
ins
->
get_operator
()},
ins
->
inputs
());
}
template
<
class
T
>
...
...
Prev
1
2
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment