Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3a848f0d
Commit
3a848f0d
authored
Mar 19, 2020
by
Paul
Browse files
Merge branch 'develop' into doc2
parents
64e8e30a
d1e945da
Changes
208
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1259 additions
and
232 deletions
+1259
-232
src/include/migraphx/program.hpp
src/include/migraphx/program.hpp
+4
-2
src/include/migraphx/schedule_model.hpp
src/include/migraphx/schedule_model.hpp
+13
-7
src/include/migraphx/target.hpp
src/include/migraphx/target.hpp
+13
-7
src/include/migraphx/tf.hpp
src/include/migraphx/tf.hpp
+8
-1
src/instruction.cpp
src/instruction.cpp
+8
-0
src/onnx/cifar10.cpp
src/onnx/cifar10.cpp
+4
-3
src/onnx/mnist.cpp
src/onnx/mnist.cpp
+3
-2
src/onnx/onnx.cpp
src/onnx/onnx.cpp
+471
-106
src/onnx/onnx.proto
src/onnx/onnx.proto
+356
-74
src/program.cpp
src/program.cpp
+68
-9
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+13
-4
src/quantization.cpp
src/quantization.cpp
+6
-0
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+79
-1
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+79
-13
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+6
-0
src/targets/gpu/argmax.cpp
src/targets/gpu/argmax.cpp
+3
-1
src/targets/gpu/argmin.cpp
src/targets/gpu/argmin.cpp
+3
-1
src/targets/gpu/deconvolution.cpp
src/targets/gpu/deconvolution.cpp
+103
-0
src/targets/gpu/device/acos.cpp
src/targets/gpu/device/acos.cpp
+1
-1
src/targets/gpu/device/acosh.cpp
src/targets/gpu/device/acosh.cpp
+18
-0
No files found.
src/include/migraphx/program.hpp
View file @
3a848f0d
...
...
@@ -87,13 +87,15 @@ struct program
instruction_ref
add_parameter
(
std
::
string
name
,
shape
s
);
instruction_ref
add_return
(
std
::
vector
<
instruction_ref
>
args
);
shape
get_parameter_shape
(
std
::
string
name
)
const
;
instruction_ref
get_parameter
(
std
::
string
name
)
const
;
std
::
unordered_map
<
std
::
string
,
shape
>
get_parameter_shapes
()
const
;
argument
eval
(
parameter_map
params
)
const
;
std
::
vector
<
argument
>
eval
(
parameter_map
params
)
const
;
bool
has_instruction
(
instruction_ref
ins
)
const
;
...
...
@@ -101,7 +103,7 @@ struct program
instruction_ref
begin
()
const
;
instruction_ref
end
()
const
;
shape
get_shape
()
const
;
std
::
vector
<
shape
>
get_
output_
shape
s
()
const
;
context
&
get_context
()
const
;
...
...
src/include/migraphx/schedule_model.hpp
View file @
3a848f0d
...
...
@@ -69,11 +69,17 @@ struct schedule_model
template
<
typename
PrivateDetailTypeErasedT
>
schedule_model
&
operator
=
(
PrivateDetailTypeErasedT
value
)
{
if
(
private_detail_te_handle_mem_var
.
unique
())
*
private_detail_te_handle_mem_var
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
else
if
(
!
private_detail_te_handle_mem_var
)
private_detail_te_handle_mem_var
=
std
::
make_shared
<
PrivateDetailTypeErasedT
>
(
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
));
using
std
::
swap
;
auto
*
derived
=
this
->
any_cast
<
PrivateDetailTypeErasedT
>
();
if
(
derived
and
private_detail_te_handle_mem_var
.
unique
())
{
*
derived
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
}
else
{
schedule_model
rhs
(
value
);
swap
(
private_detail_te_handle_mem_var
,
rhs
.
private_detail_te_handle_mem_var
);
}
return
*
this
;
}
...
...
@@ -81,7 +87,7 @@ struct schedule_model
template
<
typename
PrivateDetailTypeErasedT
>
PrivateDetailTypeErasedT
*
any_cast
()
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
@@ -92,7 +98,7 @@ struct schedule_model
template
<
typename
PrivateDetailTypeErasedT
>
const
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
*
any_cast
()
const
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
const
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
src/include/migraphx/target.hpp
View file @
3a848f0d
...
...
@@ -115,11 +115,17 @@ struct target
template
<
typename
PrivateDetailTypeErasedT
>
target
&
operator
=
(
PrivateDetailTypeErasedT
value
)
{
if
(
private_detail_te_handle_mem_var
.
unique
())
*
private_detail_te_handle_mem_var
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
else
if
(
!
private_detail_te_handle_mem_var
)
private_detail_te_handle_mem_var
=
std
::
make_shared
<
PrivateDetailTypeErasedT
>
(
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
));
using
std
::
swap
;
auto
*
derived
=
this
->
any_cast
<
PrivateDetailTypeErasedT
>
();
if
(
derived
and
private_detail_te_handle_mem_var
.
unique
())
{
*
derived
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
}
else
{
target
rhs
(
value
);
swap
(
private_detail_te_handle_mem_var
,
rhs
.
private_detail_te_handle_mem_var
);
}
return
*
this
;
}
...
...
@@ -127,7 +133,7 @@ struct target
template
<
typename
PrivateDetailTypeErasedT
>
PrivateDetailTypeErasedT
*
any_cast
()
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
@@ -138,7 +144,7 @@ struct target
template
<
typename
PrivateDetailTypeErasedT
>
const
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
*
any_cast
()
const
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
return
this
->
type
_id
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
const
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
...
...
src/include/migraphx/tf.hpp
View file @
3a848f0d
...
...
@@ -7,8 +7,15 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
/// struct to pass in tf options to parser
struct
tf_options
{
bool
is_nhwc
=
false
;
unsigned
int
batch_size
=
1
;
};
/// Create a program from a tf pb file (default is nhwc format)
program
parse_tf
(
const
std
::
string
&
name
,
bool
is_nhwc
);
program
parse_tf
(
const
std
::
string
&
name
,
tf_options
=
tf_options
{}
);
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/instruction.cpp
View file @
3a848f0d
...
...
@@ -22,6 +22,9 @@ void instruction::replace(const shape& r)
result
=
r
;
for
(
auto
&&
ins
:
output
)
{
if
(
ins
->
name
()
==
"@return"
)
continue
;
assert
(
ins
->
name
().
front
()
!=
'@'
);
ins
->
recompute_shape
();
}
...
...
@@ -70,6 +73,10 @@ bool instruction::valid() const
{
computed
=
result
;
}
else
if
(
op
.
name
()
==
"@return"
)
{
computed
=
{};
}
else
{
try
...
...
@@ -81,6 +88,7 @@ bool instruction::valid() const
return
false
;
}
}
return
result
==
computed
&&
std
::
all_of
(
output
.
begin
(),
output
.
end
(),
[
&
](
instruction_ref
i
)
{
return
std
::
find
(
i
->
inputs
().
begin
(),
i
->
inputs
().
end
(),
*
this
)
!=
i
->
inputs
().
end
();
});
...
...
src/onnx/cifar10.cpp
View file @
3a848f0d
...
...
@@ -73,8 +73,9 @@ int main(int argc, char const* argv[])
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
std
::
cout
<<
"label: "
<<
static_cast
<
uint32_t
>
(
labels
[
i
])
<<
" ----> "
;
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
3072
*
i
]});
auto
result
=
migraphx
::
gpu
::
from_gpu
(
prog
.
eval
(
m
));
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
3072
*
i
]});
auto
gpu_result
=
prog
.
eval
(
m
).
back
();
auto
result
=
migraphx
::
gpu
::
from_gpu
(
gpu_result
);
std
::
vector
<
float
>
logits
;
result
.
visit
([
&
](
auto
output
)
{
logits
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
float
>
probs
=
softmax
<
float
>
(
logits
);
...
...
@@ -95,7 +96,7 @@ int main(int argc, char const* argv[])
{
std
::
cout
<<
"label: "
<<
static_cast
<
uint32_t
>
(
labels
[
i
])
<<
" ----> "
;
auto
input3
=
migraphx
::
argument
{
s
,
&
ptr
[
3072
*
i
]};
auto
result
=
prog
.
eval
({{
"0"
,
input3
}});
auto
result
=
prog
.
eval
({{
"0"
,
input3
}})
.
back
()
;
std
::
vector
<
float
>
logits
;
result
.
visit
([
&
](
auto
output
)
{
logits
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
float
>
probs
=
softmax
<
float
>
(
logits
);
...
...
src/onnx/mnist.cpp
View file @
3a848f0d
...
...
@@ -130,8 +130,9 @@ int main(int argc, char const* argv[])
for
(
int
i
=
0
;
i
<
20
;
i
++
)
{
std
::
cout
<<
"label: "
<<
labels
[
i
]
<<
" ----> "
;
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
784
*
i
]});
auto
result
=
migraphx
::
gpu
::
from_gpu
(
prog
.
eval
(
m
));
m
[
"0"
]
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
{
s
,
&
ptr
[
784
*
i
]});
auto
results
=
prog
.
eval
(
m
).
back
();
auto
result
=
migraphx
::
gpu
::
from_gpu
(
results
);
std
::
vector
<
float
>
logits
;
result
.
visit
([
&
](
auto
output
)
{
logits
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
float
>
probs
=
softmax
(
logits
);
...
...
src/onnx/onnx.cpp
View file @
3a848f0d
This diff is collapsed.
Click to expand it.
src/onnx/onnx.proto
View file @
3a848f0d
This diff is collapsed.
Click to expand it.
src/program.cpp
View file @
3a848f0d
...
...
@@ -52,7 +52,9 @@ static void print_instruction(std::ostream& os,
os
<<
")"
;
}
os
<<
" -> "
<<
ins
->
get_shape
();
// skip return instruction shape
if
(
ins
->
name
()
!=
"@return"
)
os
<<
" -> "
<<
ins
->
get_shape
();
}
template
<
class
F
>
...
...
@@ -147,7 +149,14 @@ void program::assign(const program& p)
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
copy_inputs
.
begin
(),
[
&
](
auto
i
)
{
return
ins_map
[
i
];
});
copy_ins
=
add_instruction
(
ins
->
get_operator
(),
copy_inputs
);
if
(
ins
->
name
()
==
"@return"
)
{
copy_ins
=
add_return
(
copy_inputs
);
}
else
{
copy_ins
=
add_instruction
(
ins
->
get_operator
(),
copy_inputs
);
}
}
ins_map
[
ins
]
=
copy_ins
;
...
...
@@ -270,6 +279,18 @@ instruction_ref program::add_parameter(std::string name, shape s)
return
impl
->
instructions
.
begin
();
}
instruction_ref
program
::
add_return
(
std
::
vector
<
instruction_ref
>
args
)
{
assert
(
std
::
all_of
(
args
.
begin
(),
args
.
end
(),
[
&
](
instruction_ref
x
)
{
return
has_instruction
(
x
);
})
&&
"Argument is not an exisiting instruction"
);
impl
->
instructions
.
push_back
({
builtin
::
returns
{},
{},
args
});
auto
result
=
std
::
prev
(
impl
->
instructions
.
end
());
instruction
::
backreference
(
result
);
assert
(
result
->
valid
(
begin
()));
return
result
;
}
shape
program
::
get_parameter_shape
(
std
::
string
name
)
const
{
auto
ins
=
std
::
find_if
(
...
...
@@ -334,7 +355,26 @@ std::size_t program::size() const { return impl->instructions.size(); }
instruction_ref
program
::
begin
()
const
{
return
impl
->
instructions
.
begin
();
}
instruction_ref
program
::
end
()
const
{
return
impl
->
instructions
.
end
();
}
shape
program
::
get_shape
()
const
{
return
impl
->
instructions
.
back
().
get_shape
();
}
std
::
vector
<
shape
>
program
::
get_output_shapes
()
const
{
auto
last_ins
=
impl
->
instructions
.
back
();
if
(
last_ins
.
name
()
==
"@return"
)
{
auto
&
output_ins
=
last_ins
.
inputs
();
std
::
vector
<
shape
>
output_shapes
;
std
::
transform
(
output_ins
.
begin
(),
output_ins
.
end
(),
std
::
back_inserter
(
output_shapes
),
[](
auto
&
ins
)
{
return
ins
->
get_shape
();
});
return
output_shapes
;
}
// The else branch is to provide backward compatibility
else
{
return
{
last_ins
.
get_shape
()};
}
}
context
&
program
::
get_context
()
const
{
return
impl
->
ctx
;
}
...
...
@@ -372,10 +412,10 @@ void program::finalize()
}
template
<
class
F
>
argument
generic_eval
(
const
program
&
p
,
context
&
ctx
,
std
::
unordered_map
<
std
::
string
,
argument
>
params
,
F
trace
)
std
::
vector
<
argument
>
generic_eval
(
const
program
&
p
,
context
&
ctx
,
std
::
unordered_map
<
std
::
string
,
argument
>
params
,
F
trace
)
{
assert
(
p
.
validate
()
==
p
.
end
());
std
::
unordered_map
<
instruction_ref
,
argument
>
results
;
...
...
@@ -407,6 +447,19 @@ argument generic_eval(const program& p,
{
results
.
emplace
(
ins
,
trace
(
ins
,
[
&
]
{
return
argument
{
ins
->
get_shape
(),
nullptr
};
}));
}
else
if
(
name
==
"@return"
)
{
std
::
vector
<
argument
>
prog_outputs
;
std
::
transform
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
prog_outputs
),
[
&
](
instruction_ref
i
)
{
assert
(
results
.
find
(
i
)
!=
results
.
end
());
return
results
[
i
];
});
return
prog_outputs
;
}
else
{
values
.
resize
(
ins
->
inputs
().
size
());
...
...
@@ -421,10 +474,11 @@ argument generic_eval(const program& p,
}
assert
(
results
.
find
(
ins
)
!=
results
.
end
());
}
return
results
.
at
(
std
::
prev
(
p
.
end
()));
return
{
results
.
at
(
std
::
prev
(
p
.
end
()))};
}
argument
program
::
eval
(
std
::
unordered_map
<
std
::
string
,
argument
>
params
)
const
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
)
const
{
auto
&
ctx
=
this
->
impl
->
ctx
;
#ifndef NDEBUG
...
...
@@ -531,6 +585,11 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
print_program
(
*
this
,
[
&
](
auto
ins
,
const
auto
&
names
)
{
print_instruction
(
std
::
cout
,
ins
,
names
);
// skip return instruction
if
(
ins
->
name
()
==
"@return"
)
return
;
double
avg
=
common_average
(
ins_vec
[
ins
]);
double
percent
=
std
::
ceil
(
100.0
*
avg
/
total_instruction_time
);
os
<<
": "
<<
avg
<<
"ms, "
<<
percent
<<
"%"
;
...
...
src/py/migraphx_py.cpp
View file @
3a848f0d
...
...
@@ -158,7 +158,7 @@ PYBIND11_MODULE(migraphx, m)
py
::
class_
<
migraphx
::
program
>
(
m
,
"program"
)
.
def
(
"clone"
,
[](
migraphx
::
program
&
p
)
{
return
*
(
new
migraphx
::
program
(
p
));
})
.
def
(
"get_parameter_shapes"
,
&
migraphx
::
program
::
get_parameter_shapes
)
.
def
(
"get_shape"
,
&
migraphx
::
program
::
get_shape
)
.
def
(
"get_
output_
shape
s
"
,
&
migraphx
::
program
::
get_
output_
shape
s
)
.
def
(
"compile"
,
[](
migraphx
::
program
&
p
,
const
migraphx
::
target
&
t
,
bool
offload_copy
)
{
migraphx
::
compile_options
options
;
...
...
@@ -173,11 +173,20 @@ PYBIND11_MODULE(migraphx, m)
.
def
(
"__repr__"
,
[](
const
migraphx
::
program
&
p
)
{
return
migraphx
::
to_string
(
p
);
});
m
.
def
(
"parse_tf"
,
&
migraphx
::
parse_tf
,
[](
const
std
::
string
&
filename
,
bool
is_nhwc
,
unsigned
int
batch_size
)
{
return
migraphx
::
parse_tf
(
filename
,
migraphx
::
tf_options
{
is_nhwc
,
batch_size
});
},
"Parse tf protobuf (default format is nhwc)"
,
py
::
arg
(
"filename"
),
py
::
arg
(
"is_nhwc"
)
=
true
);
m
.
def
(
"parse_onnx"
,
&
migraphx
::
parse_onnx
);
py
::
arg
(
"is_nhwc"
)
=
true
,
py
::
arg
(
"batch_size"
)
=
1
);
m
.
def
(
"parse_onnx"
,
[](
const
std
::
string
&
filename
,
unsigned
int
batch_size
)
{
return
migraphx
::
parse_onnx
(
filename
,
migraphx
::
onnx_options
{
batch_size
});
},
"Parse onnx file"
,
py
::
arg
(
"filename"
),
py
::
arg
(
"batch_size"
)
=
1
);
m
.
def
(
"get_target"
,
[](
const
std
::
string
&
name
)
->
migraphx
::
target
{
if
(
name
==
"cpu"
)
...
...
src/quantization.cpp
View file @
3a848f0d
...
...
@@ -105,6 +105,9 @@ void quantize_fp16(program& prog, const std::vector<std::string>& ins_names)
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
ins
->
name
()
==
"@return"
)
break
;
// all indicates every instruction is converted
if
((
not
contains
(
ins_names
,
"all"
))
and
(
not
contains
(
ins_names
,
ins
->
name
())))
{
...
...
@@ -335,6 +338,9 @@ void quantize_int8_impl(program& prog,
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
map_ins_index
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
ins
->
name
()
==
"@return"
)
break
;
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
...
...
src/simplify_algebra.cpp
View file @
3a848f0d
...
...
@@ -27,6 +27,15 @@ auto conv_const_weights()
match
::
args
(
match
::
any
(),
match
::
is_constant
().
bind
(
"w"
)));
}
MIGRAPHX_PRED_MATCHER
(
args_has_same_ops
,
instruction_ref
ins
)
{
if
(
ins
->
inputs
().
empty
())
return
true
;
return
std
::
all_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
j
)
{
return
j
->
get_operator
()
==
ins
->
inputs
().
front
()
->
get_operator
();
});
}
struct
find_mul_conv
{
auto
matcher
()
const
...
...
@@ -167,6 +176,73 @@ struct find_inner_broadcast
}
};
struct
find_concat_unary
{
auto
matcher
()
const
{
return
match
::
name
(
"concat"
)(
args_has_same_ops
(),
match
::
arg
(
0
)(
match
::
nargs
(
1
),
match
::
name
(
"relu"
,
"broadcast"
).
bind
(
"x"
),
match
::
used_once
()));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
ins
=
r
.
result
;
auto
x
=
r
.
instructions
[
"x"
];
auto
op
=
x
->
get_operator
();
auto
axis
=
any_cast
<
op
::
concat
>
(
ins
->
get_operator
()).
axis
;
// Adjust broadcast lens
if
(
op
.
name
()
==
"broadcast"
)
{
auto
b
=
any_cast
<
op
::
broadcast
>
(
op
);
if
(
b
.
axis
!=
axis
)
return
;
b
.
broadcast_lens
=
ins
->
get_shape
().
lens
();
op
=
b
;
axis
=
0
;
}
auto
inputs
=
ins
->
inputs
();
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
auto
i
)
{
return
i
->
inputs
().
front
();
});
auto
concat
=
p
.
insert_instruction
(
ins
,
op
::
concat
{
axis
},
inputs
);
p
.
replace_instruction
(
ins
,
op
,
concat
);
}
};
struct
find_concat_binary
{
auto
matcher
()
const
{
return
match
::
name
(
"concat"
)(
args_has_same_ops
(),
match
::
arg
(
0
)(
match
::
nargs
(
2
),
match
::
name
(
"add"
,
"multiply"
).
bind
(
"x"
),
match
::
used_once
()));
}
void
apply
(
program
&
p
,
match
::
matcher_result
r
)
const
{
auto
ins
=
r
.
result
;
auto
x
=
r
.
instructions
[
"x"
];
auto
op
=
x
->
get_operator
();
auto
concat_op
=
ins
->
get_operator
();
auto
xinputs
=
ins
->
inputs
();
std
::
transform
(
xinputs
.
begin
(),
xinputs
.
end
(),
xinputs
.
begin
(),
[
&
](
auto
i
)
{
return
i
->
inputs
().
front
();
});
auto
yinputs
=
ins
->
inputs
();
std
::
transform
(
yinputs
.
begin
(),
yinputs
.
end
(),
yinputs
.
begin
(),
[
&
](
auto
i
)
{
return
i
->
inputs
().
back
();
});
auto
xconcat
=
p
.
insert_instruction
(
ins
,
concat_op
,
xinputs
);
auto
yconcat
=
p
.
insert_instruction
(
ins
,
concat_op
,
yinputs
);
p
.
replace_instruction
(
ins
,
op
,
xconcat
,
yconcat
);
}
};
bool
axis_equal
(
const
std
::
vector
<
std
::
size_t
>&
x
,
const
std
::
vector
<
std
::
size_t
>&
y
,
std
::
size_t
axis
)
...
...
@@ -281,7 +357,9 @@ void simplify_algebra::apply(program& p) const
find_add_lit_broadcast
{},
find_add_convs
{},
find_mul_conv
{},
find_mul_add
{});
find_mul_add
{},
find_concat_unary
{},
find_concat_binary
{});
dead_code_elimination
{}.
apply
(
p
);
}
}
...
...
src/targets/cpu/lowering.cpp
View file @
3a848f0d
...
...
@@ -4,6 +4,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
...
...
@@ -144,13 +145,14 @@ struct cpu_lrn
int
height
=
output_shape
.
lens
()[
2
];
int
width
=
output_shape
.
lens
()[
3
];
float
alphaoverarea
=
op
.
alpha
/
float
(
op
.
size
);
int
radius
=
(
op
.
size
-
1
)
/
2
;
int
radius_lower
=
(
op
.
size
-
1
)
/
2
;
int
radius_upper
=
op
.
size
/
2
+
1
;
par_dfor
(
n_batch
,
height
,
width
)([
&
](
int
b
,
int
h
,
int
w
)
{
float
scale
=
0
;
dfor
(
channels
)([
&
](
int
c
)
{
auto
start
=
(
c
-
radius
)
<
0
?
0
:
(
c
-
radius
);
auto
end
=
(
c
+
radius
)
>
channels
?
channels
:
(
c
+
radius
);
auto
start
=
(
c
-
radius
_lower
)
<
0
?
0
:
(
c
-
radius
_lower
);
auto
end
=
(
c
+
radius
_upper
)
>
channels
?
channels
:
(
c
+
radius
_upper
);
for
(
auto
k
=
start
;
k
<
end
;
++
k
)
{
scale
+=
std
::
pow
(
input
(
b
,
k
,
h
,
w
),
2
);
...
...
@@ -220,6 +222,67 @@ struct cpu_convolution
}
};
template
<
class
Op
>
struct
cpu_deconvolution
{
Op
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::"
+
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input
,
auto
weights
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
std
::
fill
(
output
.
begin
(),
output
.
end
(),
type
{
0
});
auto
out_lens
=
output_shape
.
lens
();
auto
out_h
=
out_lens
[
2
];
auto
out_w
=
out_lens
[
3
];
auto
in
=
input
.
get_shape
().
lens
();
auto
in_n
=
in
[
0
];
auto
in_c
=
in
[
1
];
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
in_n
,
wei_c
)([
&
](
std
::
size_t
o
,
std
::
size_t
k
)
{
dfor
(
in_c
,
in_h
,
in_w
,
wei_h
,
wei_w
)(
[
&
](
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
out_x
=
start_x
+
x
*
op
.
dilation
[
0
];
const
int
out_y
=
start_y
+
y
*
op
.
dilation
[
1
];
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
out_x
>=
0
&&
out_x
<
out_h
&&
out_y
>=
0
&&
out_y
<
out_w
)
{
output
(
o
,
in_ch
,
out_x
,
out_y
)
+=
input
(
o
,
w
,
i
,
j
)
*
weights
(
w
,
k
,
x
,
y
);
}
});
});
});
return
result
;
}
};
struct
cpu_im2col
{
op
::
im2col
op
;
...
...
@@ -598,9 +661,10 @@ struct cpu_softmax
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
batch_lens
=
output_shape
.
lens
();
std
::
size_t
n_dims
=
batch_lens
[
op
.
axis
];
batch_lens
[
op
.
axis
]
=
1
;
auto
batch_lens
=
output_shape
.
lens
();
int64_t
tuned_axis
=
(
op
.
axis
<
0
)
?
op
.
axis
+
args
[
0
].
get_shape
().
lens
().
size
()
:
op
.
axis
;
std
::
size_t
n_dims
=
batch_lens
[
tuned_axis
];
batch_lens
[
tuned_axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
...
...
@@ -612,26 +676,26 @@ struct cpu_softmax
auto
idx
=
batch_shape
.
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
idx
[
tuned_
axis
]
=
j
;
batch_max
[
i
]
=
std
::
max
(
batch_max
[
i
],
input
(
idx
.
begin
(),
idx
.
end
()));
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
idx
[
tuned_
axis
]
=
j
;
std
::
size_t
index
=
output_shape
.
index
(
idx
);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
idx
[
tuned_
axis
]
=
j
;
batch_sum
[
i
]
+=
output
(
idx
.
begin
(),
idx
.
end
());
}
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
op
.
axis
]
=
j
;
idx
[
tuned_
axis
]
=
j
;
output
(
idx
.
begin
(),
idx
.
end
())
=
op
.
output
()(
output
(
idx
.
begin
(),
idx
.
end
()),
batch_sum
[
i
]);
}
...
...
@@ -664,8 +728,10 @@ struct cpu_apply
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
<
op
::
convolution
>
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"deconvolution"
]
=
extend_op
<
cpu_deconvolution
<
op
::
deconvolution
>
,
op
::
deconvolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"quant_convolution"
]
=
extend_op
<
cpu_convolution
<
op
::
quant_convolution
>
,
op
::
quant_convolution
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
...
...
src/targets/gpu/CMakeLists.txt
View file @
3a848f0d
...
...
@@ -12,6 +12,7 @@ endif()
add_library
(
migraphx_device
device/acos.cpp
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
...
...
@@ -20,7 +21,9 @@ add_library(migraphx_device
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
...
...
@@ -43,10 +46,12 @@ add_library(migraphx_device
device/mul_add_relu.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/round.cpp
device/rsqrt.cpp
...
...
@@ -79,6 +84,7 @@ add_library(migraphx_gpu
lowering.cpp
pooling.cpp
convolution.cpp
deconvolution.cpp
quant_convolution.cpp
softmax.cpp
logsoftmax.cpp
...
...
src/targets/gpu/argmax.cpp
View file @
3a848f0d
...
...
@@ -14,7 +14,9 @@ shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
argument
hip_argmax
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
argmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
axis
);
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
int64_t
tuned_axis
=
(
op
.
axis
<
0
)
?
op
.
axis
+
n_dim
:
op
.
axis
;
device
::
argmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
return
args
.
back
();
}
...
...
src/targets/gpu/argmin.cpp
View file @
3a848f0d
...
...
@@ -14,7 +14,9 @@ shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
argument
hip_argmin
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
argmin
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
axis
);
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
int64_t
tuned_axis
=
(
op
.
axis
<
0
)
?
op
.
axis
+
n_dim
:
op
.
axis
;
device
::
argmin
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
return
args
.
back
();
}
...
...
src/targets/gpu/deconvolution.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_deconvolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
}
argument
miopen_deconvolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Running deconvolution failed"
);
return
args
[
3
];
}
shape
miopen_deconvolution
::
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
]);
auto
w_desc
=
make_tensor
(
inputs
[
1
]);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Find deconvolution failed"
);
handle
=
ctx
.
get_stream
().
get_miopen
();
algo
=
perf
.
fwd_algo
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_deconvolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"Workspace has changed during finalization."
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/acos.cpp
View file @
3a848f0d
...
...
@@ -9,7 +9,7 @@ namespace device {
void
acos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acos
(
to_hip_type
(
x
));
});
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
acos
(
to_hip_type
(
x
));
});
}
}
// namespace device
...
...
src/targets/gpu/device/acosh.cpp
0 → 100644
View file @
3a848f0d
#include <migraphx/gpu/device/acosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acosh
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acosh
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
4
5
6
7
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment