Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
bf6f82d8
Commit
bf6f82d8
authored
May 16, 2019
by
Paul
Browse files
Merge from develop
parents
6a0797e2
b93f5320
Changes
92
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
439 additions
and
373 deletions
+439
-373
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+7
-16
src/opt/memory_coloring_impl.hpp
src/opt/memory_coloring_impl.hpp
+17
-17
src/program.cpp
src/program.cpp
+70
-5
src/propagate_constant.cpp
src/propagate_constant.cpp
+24
-14
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+126
-307
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+2
-0
src/targets/gpu/abs.cpp
src/targets/gpu/abs.cpp
+1
-1
src/targets/gpu/clip.cpp
src/targets/gpu/clip.cpp
+23
-0
src/targets/gpu/device/clip.cpp
src/targets/gpu/device/clip.cpp
+22
-0
src/targets/gpu/device/gather.cpp
src/targets/gpu/device/gather.cpp
+1
-1
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+20
-5
src/targets/gpu/include/migraphx/gpu/abs.hpp
src/targets/gpu/include/migraphx/gpu/abs.hpp
+11
-1
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
+11
-1
src/targets/gpu/include/migraphx/gpu/clip.hpp
src/targets/gpu/include/migraphx/gpu/clip.hpp
+37
-0
src/targets/gpu/include/migraphx/gpu/concat.hpp
src/targets/gpu/include/migraphx/gpu/concat.hpp
+10
-1
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
+11
-1
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+4
-1
src/targets/gpu/include/migraphx/gpu/device/clip.hpp
src/targets/gpu/include/migraphx/gpu/device/clip.hpp
+20
-0
src/targets/gpu/include/migraphx/gpu/elu.hpp
src/targets/gpu/include/migraphx/gpu/elu.hpp
+11
-1
src/targets/gpu/include/migraphx/gpu/gather.hpp
src/targets/gpu/include/migraphx/gpu/gather.hpp
+11
-1
No files found.
src/opt/memory_coloring_impl.cpp
View file @
bf6f82d8
...
@@ -63,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
...
@@ -63,11 +63,11 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
}
}
}
}
long
long
offset
=
0
;
std
::
size_t
offset
=
0
;
while
(
!
conflict_queue
.
empty
())
while
(
!
conflict_queue
.
empty
())
{
{
live_range
*
range
=
conflict_queue
.
top
();
live_range
*
range
=
conflict_queue
.
top
();
long
long
iter_offset
=
range
->
offset
;
std
::
size_t
iter_offset
=
range
->
offset
;
if
(
offset
>
iter_offset
)
if
(
offset
>
iter_offset
)
{
{
offset
=
std
::
max
(
offset
,
iter_offset
+
range
->
size
);
offset
=
std
::
max
(
offset
,
iter_offset
+
range
->
size
);
...
@@ -97,7 +97,7 @@ void memory_coloring_impl::build()
...
@@ -97,7 +97,7 @@ void memory_coloring_impl::build()
if
(
num_of_instrs
==
0
)
if
(
num_of_instrs
==
0
)
return
;
return
;
int
cur_points
=
num_of_instrs
*
2
;
auto
cur_points
=
num_of_instrs
*
2
;
instruction_ref
iter
=
p_program
->
end
();
instruction_ref
iter
=
p_program
->
end
();
instruction_ref
begin
=
p_program
->
begin
();
instruction_ref
begin
=
p_program
->
begin
();
std
::
vector
<
instruction_ref
>
dead_instrs
;
std
::
vector
<
instruction_ref
>
dead_instrs
;
...
@@ -193,13 +193,13 @@ void memory_coloring_impl::rewrite()
...
@@ -193,13 +193,13 @@ void memory_coloring_impl::rewrite()
continue
;
continue
;
std
::
size_t
offset
=
0
;
std
::
size_t
offset
=
0
;
if
(
interval
->
get_offset
()
=
=
invalid_offset
)
if
(
interval
->
get_offset
()
!
=
invalid_offset
)
{
{
assert
(
interval
->
result
.
bytes
()
==
0
);
offset
=
interval
->
get_offset
(
);
}
}
else
else
{
{
offset
=
interval
->
get_offset
(
);
assert
(
interval
->
result
.
bytes
()
==
0
);
}
}
if
(
is_allocate
(
ins
))
if
(
is_allocate
(
ins
))
...
@@ -207,15 +207,6 @@ void memory_coloring_impl::rewrite()
...
@@ -207,15 +207,6 @@ void memory_coloring_impl::rewrite()
p_program
->
replace_instruction
(
p_program
->
replace_instruction
(
ins
,
op
::
load
{
ins
->
get_shape
(),
offset
},
scratch_param
);
ins
,
op
::
load
{
ins
->
get_shape
(),
offset
},
scratch_param
);
}
}
else
if
(
is_literal
(
ins
))
{
#if 0
auto pre = p_program->add_literal(ins->lit);
bool pre_copy = (interval->get_begin() < earliest_end_point);
p_program->replace_instruction(
ins, write_literal{offset, pre_copy}, scratch_param, pre);
#endif
}
}
}
}
}
MIGRAPHX_DEBUG
(
dump
(
"---After rewrite---"
));
MIGRAPHX_DEBUG
(
dump
(
"---After rewrite---"
));
...
...
src/opt/memory_coloring_impl.hpp
View file @
bf6f82d8
...
@@ -21,15 +21,15 @@
...
@@ -21,15 +21,15 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
static
const
in
t
invalid_offset
=
-
1
;
static
const
std
::
size_
t
invalid_offset
=
std
::
numeric_limits
<
std
::
size_t
>::
max
()
;
struct
live_range
struct
live_range
{
{
in
t
begin
;
// begin point in the instruction stream.
std
::
size_
t
begin
;
// begin point in the instruction stream.
int
end
;
// end point in the instruction stream.
std
::
size_t
end
;
// end point in the instruction stream.
long
long
offset
;
// offset to base pointer of allocated memory trunk.
std
::
size_t
offset
;
// offset to base pointer of allocated memory trunk.
int
vn
;
// value number that identifies this live_range.
std
::
size_t
vn
;
// value number that identifies this live_range.
long
long
size
;
// size of required memory in bytes
std
::
size_t
size
;
// size of required memory in bytes
#ifdef MIGRAPHX_DEBUG_OPT
#ifdef MIGRAPHX_DEBUG_OPT
void
dump
();
void
dump
();
#endif
#endif
...
@@ -45,9 +45,9 @@ struct live_interval
...
@@ -45,9 +45,9 @@ struct live_interval
is_live_on_entry
=
false
;
is_live_on_entry
=
false
;
}
}
void
add_use
(
in
t
use
)
{
use_points
.
push_front
(
use
);
}
void
add_use
(
std
::
size_
t
use
)
{
use_points
.
push_front
(
use
);
}
in
t
get_begin
()
const
{
return
segment
.
begin
;
}
std
::
size_
t
get_begin
()
const
{
return
segment
.
begin
;
}
in
t
get_end
()
const
{
return
segment
.
end
;
}
std
::
size_
t
get_end
()
const
{
return
segment
.
end
;
}
long
long
get_offset
()
const
{
return
segment
.
offset
;
}
long
long
get_offset
()
const
{
return
segment
.
offset
;
}
#ifdef MIGRAPHX_DEBUG_OPT
#ifdef MIGRAPHX_DEBUG_OPT
...
@@ -55,9 +55,9 @@ struct live_interval
...
@@ -55,9 +55,9 @@ struct live_interval
#endif
#endif
live_range
segment
;
live_range
segment
;
in
t
id
;
std
::
size_
t
id
;
std
::
list
<
in
t
>
use_points
;
std
::
list
<
std
::
size_
t
>
use_points
;
in
t
def_point
;
std
::
size_
t
def_point
;
shape
result
;
shape
result
;
bool
is_literal
;
bool
is_literal
;
bool
is_live_on_entry
;
bool
is_live_on_entry
;
...
@@ -111,8 +111,8 @@ struct memory_coloring_impl
...
@@ -111,8 +111,8 @@ struct memory_coloring_impl
{
{
if
((
range1
.
size
==
0
)
||
(
range2
.
size
==
0
))
if
((
range1
.
size
==
0
)
||
(
range2
.
size
==
0
))
return
false
;
return
false
;
long
long
end1
=
range1
.
offset
+
range1
.
size
-
1
;
auto
end1
=
range1
.
offset
+
range1
.
size
-
1
;
long
long
end2
=
range2
.
offset
+
range2
.
size
-
1
;
auto
end2
=
range2
.
offset
+
range2
.
size
-
1
;
return
((
end1
<
range2
.
offset
)
||
(
end2
<
range1
.
offset
));
return
((
end1
<
range2
.
offset
)
||
(
end2
<
range1
.
offset
));
}
}
void
verify
();
void
verify
();
...
@@ -125,8 +125,8 @@ struct memory_coloring_impl
...
@@ -125,8 +125,8 @@ struct memory_coloring_impl
{
{
bool
operator
()(
const
interval_ptr
i1
,
const
interval_ptr
i2
)
const
bool
operator
()(
const
interval_ptr
i1
,
const
interval_ptr
i2
)
const
{
{
int
len1
=
i1
->
get_end
()
-
i1
->
get_begin
();
auto
len1
=
i1
->
get_end
()
-
i1
->
get_begin
();
int
len2
=
i2
->
get_end
()
-
i2
->
get_begin
();
auto
len2
=
i2
->
get_end
()
-
i2
->
get_begin
();
if
(
len1
!=
len2
)
if
(
len1
!=
len2
)
{
{
return
(
len1
<
len2
);
return
(
len1
<
len2
);
...
@@ -158,7 +158,7 @@ struct memory_coloring_impl
...
@@ -158,7 +158,7 @@ struct memory_coloring_impl
int
num_of_lives
;
int
num_of_lives
;
int
max_value_number
;
int
max_value_number
;
long
long
required_bytes
;
std
::
size_t
required_bytes
;
// The earliest program point where an live interval ends.
// The earliest program point where an live interval ends.
int
earliest_end_point
;
int
earliest_end_point
;
// The latest program point where an live interval ends.
// The latest program point where an live interval ends.
...
...
src/program.cpp
View file @
bf6f82d8
...
@@ -63,11 +63,16 @@ static void print_program(const program& p, F print_func)
...
@@ -63,11 +63,16 @@ static void print_program(const program& p, F print_func)
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
p
))
{
{
std
::
string
var_name
=
"@"
+
std
::
to_string
(
count
)
;
std
::
string
var_name
;
if
(
ins
->
name
()
==
"@param"
)
if
(
ins
->
name
()
==
"@param"
)
{
{
var_name
=
any_cast
<
builtin
::
param
>
(
ins
->
get_operator
()).
parameter
;
var_name
=
any_cast
<
builtin
::
param
>
(
ins
->
get_operator
()).
parameter
;
}
}
else
{
var_name
=
"@"
+
std
::
to_string
(
count
);
count
++
;
}
names
.
emplace
(
ins
,
var_name
);
names
.
emplace
(
ins
,
var_name
);
// TODO: Use all_of
// TODO: Use all_of
...
@@ -78,17 +83,77 @@ static void print_program(const program& p, F print_func)
...
@@ -78,17 +83,77 @@ static void print_program(const program& p, F print_func)
}
}
print_func
(
ins
,
names
);
print_func
(
ins
,
names
);
count
++
;
}
}
}
}
program
::
program
()
:
impl
(
std
::
make_unique
<
program_impl
>
())
{}
program
::
program
()
:
impl
(
std
::
make_unique
<
program_impl
>
())
{}
program
::
program
(
program
&&
)
noexcept
=
default
;
program
::
program
(
program
&&
)
noexcept
=
default
;
program
&
program
::
operator
=
(
program
&&
)
noexcept
=
default
;
program
::~
program
()
noexcept
=
default
;
program
::~
program
()
noexcept
=
default
;
// copy constructor
program
::
program
(
const
program
&
p
)
{
assign
(
p
);
}
// copy assignment operator
program
&
program
::
operator
=
(
program
p
)
{
std
::
swap
(
p
.
impl
,
this
->
impl
);
return
*
this
;
}
void
program
::
assign
(
const
program
&
p
)
{
// clean the current program
if
(
!
impl
)
{
impl
=
std
::
make_unique
<
program_impl
>
();
}
else
if
(
!
impl
->
instructions
.
empty
())
{
impl
->
instructions
.
clear
();
}
impl
->
ctx
=
p
.
impl
->
ctx
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
p
))
{
instruction_ref
copy_ins
{};
if
(
ins
->
name
()
==
"@literal"
)
{
auto
l
=
ins
->
get_literal
();
copy_ins
=
impl
->
instructions
.
insert
(
impl
->
instructions
.
end
(),
instruction
{
l
});
}
else
if
(
ins
->
name
()
==
"@param"
)
{
auto
&&
name
=
any_cast
<
builtin
::
param
>
(
ins
->
get_operator
()).
parameter
;
auto
s
=
ins
->
get_shape
();
copy_ins
=
impl
->
instructions
.
insert
(
impl
->
instructions
.
end
(),
{
builtin
::
param
{
name
},
std
::
move
(
s
),
{}});
}
else
if
(
ins
->
name
()
==
"@outline"
)
{
auto
s
=
ins
->
get_shape
();
copy_ins
=
impl
->
instructions
.
insert
(
impl
->
instructions
.
end
(),
{
builtin
::
outline
{
s
},
s
,
{}});
}
else
{
// retrieve its mapped input
auto
inputs
=
ins
->
inputs
();
// ensure all inputs have its corresponding copy instructions
assert
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
i
)
{
return
ins_map
.
count
(
i
)
>
0
;
}));
std
::
vector
<
instruction_ref
>
copy_inputs
(
inputs
.
size
());
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
copy_inputs
.
begin
(),
[
&
](
auto
i
)
{
return
ins_map
[
i
];
});
copy_ins
=
add_instruction
(
ins
->
get_operator
(),
copy_inputs
);
}
ins_map
[
ins
]
=
copy_ins
;
}
}
instruction_ref
program
::
add_instruction
(
const
operation
&
op
,
std
::
vector
<
instruction_ref
>
args
)
instruction_ref
program
::
add_instruction
(
const
operation
&
op
,
std
::
vector
<
instruction_ref
>
args
)
{
{
return
insert_instruction
(
impl
->
instructions
.
end
(),
op
,
std
::
move
(
args
));
return
insert_instruction
(
impl
->
instructions
.
end
(),
op
,
std
::
move
(
args
));
...
...
src/propagate_constant.cpp
View file @
bf6f82d8
...
@@ -22,22 +22,32 @@ bool skip_propogate(instruction_ref ins)
...
@@ -22,22 +22,32 @@ bool skip_propogate(instruction_ref ins)
void
propagate_constant
::
apply
(
program
&
p
)
const
void
propagate_constant
::
apply
(
program
&
p
)
const
{
{
for
(
auto
i
:
iterator_for
(
p
))
{
if
(
i
->
name
()
!=
"@literal"
)
continue
;
if
(
i
->
outputs
().
empty
())
continue
;
fix
([
&
](
auto
self
,
auto
ins
)
{
fix
([
&
](
auto
self
,
auto
ins
)
{
if
(
not
skip_propogate
(
ins
))
std
::
unordered_set
<
instruction_ref
>
children
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
());
for
(
auto
child
:
children
)
{
{
auto
r
=
ins
->
eval
();
if
(
skip_propogate
(
child
))
{
self
(
child
);
continue
;
}
auto
r
=
child
->
eval
();
if
(
not
r
.
empty
())
if
(
not
r
.
empty
())
{
{
assert
(
r
.
get_shape
()
==
ins
->
get_shape
());
assert
(
r
.
get_shape
()
==
child
->
get_shape
());
auto
l
=
p
.
add_literal
(
r
.
get_shape
(),
r
.
data
());
auto
l
=
p
.
add_literal
(
r
.
get_shape
(),
r
.
data
());
p
.
replace_instruction
(
ins
,
l
);
self
(
p
.
replace_instruction
(
child
,
l
));
return
;
}
}
}
}
std
::
unordered_set
<
instruction_ref
>
children
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
());
})(
i
);
for
(
auto
child
:
children
)
}
self
(
child
);
})(
std
::
prev
(
p
.
end
()));
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/cpu/lowering.cpp
View file @
bf6f82d8
...
@@ -48,6 +48,12 @@ struct cpu_batch_norm_inference
...
@@ -48,6 +48,12 @@ struct cpu_batch_norm_inference
{
{
op
::
batch_norm_inference
op
;
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::batch_norm_inference"
;
}
std
::
string
name
()
const
{
return
"cpu::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
...
@@ -107,6 +113,12 @@ struct cpu_lrn
...
@@ -107,6 +113,12 @@ struct cpu_lrn
{
{
op
::
lrn
op
;
op
::
lrn
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::lrn"
;
}
std
::
string
name
()
const
{
return
"cpu::lrn"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
@@ -117,7 +129,7 @@ struct cpu_lrn
...
@@ -117,7 +129,7 @@ struct cpu_lrn
int
channels
=
output_shape
.
lens
()[
1
];
int
channels
=
output_shape
.
lens
()[
1
];
int
height
=
output_shape
.
lens
()[
2
];
int
height
=
output_shape
.
lens
()[
2
];
int
width
=
output_shape
.
lens
()[
3
];
int
width
=
output_shape
.
lens
()[
3
];
float
alphaoverarea
=
op
.
alpha
/
op
.
size
;
float
alphaoverarea
=
op
.
alpha
/
float
(
op
.
size
)
;
int
radius
=
(
op
.
size
-
1
)
/
2
;
int
radius
=
(
op
.
size
-
1
)
/
2
;
par_dfor
(
n_batch
,
height
,
width
)([
&
](
int
b
,
int
h
,
int
w
)
{
par_dfor
(
n_batch
,
height
,
width
)([
&
](
int
b
,
int
h
,
int
w
)
{
...
@@ -144,6 +156,12 @@ struct cpu_convolution
...
@@ -144,6 +156,12 @@ struct cpu_convolution
{
{
op
::
convolution
op
;
op
::
convolution
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::convolution"
;
}
std
::
string
name
()
const
{
return
"cpu::convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
@@ -165,15 +183,15 @@ struct cpu_convolution
...
@@ -165,15 +183,15 @@ struct cpu_convolution
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
int
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
auto
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
int
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
auto
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
int
group_id
=
w
/
(
wei_n
/
op
.
group
);
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
double
acc
=
0
;
double
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
int
in_x
=
start_x
+
x
;
const
auto
in_x
=
start_x
+
x
;
const
int
in_y
=
start_y
+
y
;
const
auto
in_y
=
start_y
+
y
;
const
int
in_ch
=
group_id
*
wei_c
+
k
;
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
{
acc
+=
input
(
o
,
in_ch
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
acc
+=
input
(
o
,
in_ch
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
...
@@ -190,6 +208,12 @@ struct cpu_im2col
...
@@ -190,6 +208,12 @@ struct cpu_im2col
{
{
op
::
im2col
op
;
op
::
im2col
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
static
std
::
string
name
()
{
return
"cpu::im2col"
;
}
static
std
::
string
name
()
{
return
"cpu::im2col"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
...
@@ -209,10 +233,8 @@ struct cpu_im2col
...
@@ -209,10 +233,8 @@ struct cpu_im2col
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
int
kdiv2_h
;
auto
kdiv2_h
=
kernel_h
/
2
;
int
kdiv2_w
;
auto
kdiv2_w
=
kernel_w
/
2
;
kdiv2_h
=
kernel_h
/
2
;
kdiv2_w
=
kernel_w
/
2
;
// calculate output sizes
// calculate output sizes
const
std
::
size_t
col_height
=
(
height
-
kernel_h
+
2
*
pad_h
)
/
stride_h
+
1
;
const
std
::
size_t
col_height
=
(
height
-
kernel_h
+
2
*
pad_h
)
/
stride_h
+
1
;
const
std
::
size_t
col_width
=
(
width
-
kernel_w
+
2
*
pad_w
)
/
stride_w
+
1
;
const
std
::
size_t
col_width
=
(
width
-
kernel_w
+
2
*
pad_w
)
/
stride_w
+
1
;
...
@@ -230,8 +252,8 @@ struct cpu_im2col
...
@@ -230,8 +252,8 @@ struct cpu_im2col
dfor
(
channels
,
dfor
(
channels
,
kernel_h
,
kernel_h
,
kernel_w
)([
&
](
std
::
size_t
c
,
std
::
size_t
koffset
,
std
::
size_t
loffset
)
{
kernel_w
)([
&
](
std
::
size_t
c
,
std
::
size_t
koffset
,
std
::
size_t
loffset
)
{
int
idx
=
iinput
+
koffset
-
kdiv2_h
;
auto
idx
=
iinput
+
koffset
-
kdiv2_h
;
int
jdx
=
jinput
+
loffset
-
kdiv2_w
;
auto
jdx
=
jinput
+
loffset
-
kdiv2_w
;
col
(
ldx
,
p
)
=
((
idx
>=
0
)
&&
(
idx
<
height
)
&&
(
jdx
>=
0
)
&&
(
jdx
<
width
))
col
(
ldx
,
p
)
=
((
idx
>=
0
)
&&
(
idx
<
height
)
&&
(
jdx
>=
0
)
&&
(
jdx
<
width
))
?
input
(
0
,
c
,
idx
,
jdx
)
?
input
(
0
,
c
,
idx
,
jdx
)
:
0
;
:
0
;
...
@@ -273,6 +295,12 @@ struct cpu_pooling
...
@@ -273,6 +295,12 @@ struct cpu_pooling
{
{
op
::
pooling
op
;
op
::
pooling
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::pooling_"
+
Op
::
name
();
}
std
::
string
name
()
const
{
return
"cpu::pooling_"
+
Op
::
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
@@ -317,20 +345,35 @@ struct cpu_pooling
...
@@ -317,20 +345,35 @@ struct cpu_pooling
}
}
};
};
struct
cpu_
contiguous
struct
cpu_
op
{
{
op
::
contiguous
op
;
op
eration
op
;
std
::
string
name
()
const
{
return
"cpu::
contiguous"
;
}
std
::
string
name
()
const
{
return
"cpu::
"
+
op
.
name
()
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>
&
args
)
const
{
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
)
)
;
return
op
.
compute
(
output_shape
,
args
);
}
}
friend
bool
operator
==
(
const
cpu_op
&
x
,
const
cpu_op
&
y
)
{
return
x
.
op
==
y
.
op
;
}
friend
bool
operator
==
(
const
cpu_op
&
x
,
const
operation
&
y
)
{
if
(
x
.
name
()
!=
y
.
name
())
return
false
;
return
x
==
any_cast
<
cpu_op
>
(
y
);
}
friend
bool
operator
==
(
const
operation
&
x
,
const
cpu_op
&
y
)
{
return
y
==
x
;
}
};
};
struct
cpu_pad
struct
cpu_pad
{
{
op
::
pad
op
;
op
::
pad
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::contiguous"
;
}
std
::
string
name
()
const
{
return
"cpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
@@ -354,20 +397,15 @@ struct cpu_pad
...
@@ -354,20 +397,15 @@ struct cpu_pad
}
}
};
};
struct
cpu_concat
{
op
::
concat
op
;
std
::
string
name
()
const
{
return
"cpu::concat"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
cpu_gemm
struct
cpu_gemm
{
{
op
::
dot
op
;
op
::
dot
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::dot"
;
}
std
::
string
name
()
const
{
return
"cpu::dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
...
@@ -410,162 +448,6 @@ struct cpu_gemm
...
@@ -410,162 +448,6 @@ struct cpu_gemm
}
}
};
};
struct
cpu_gather
{
op
::
gather
op
;
std
::
string
name
()
const
{
return
"cpu::gather"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
return
op
.
compute
(
output_shape
,
std
::
move
(
args
));
}
};
struct
identity_op
{
std
::
string
name
()
const
{
return
"cpu::identity"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
x
;
};
}
};
struct
abs_op
{
std
::
string
name
()
const
{
return
"cpu::abs"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
abs
(
make_signed
(
x
));
};
}
};
struct
exp_op
{
std
::
string
name
()
const
{
return
"cpu::exp"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
exp
(
x
);
};
}
};
struct
log_op
{
std
::
string
name
()
const
{
return
"cpu::log"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
log
(
x
);
};
}
};
struct
sin_op
{
std
::
string
name
()
const
{
return
"cpu::sin"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
sin
(
x
);
};
}
};
struct
cos_op
{
std
::
string
name
()
const
{
return
"cpu::cos"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
cos
(
x
);
};
}
};
struct
tan_op
{
std
::
string
name
()
const
{
return
"cpu::tan"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
tan
(
x
);
};
}
};
struct
asin_op
{
std
::
string
name
()
const
{
return
"cpu::asin"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
asin
(
x
);
};
}
};
struct
acos_op
{
std
::
string
name
()
const
{
return
"cpu::acos"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
acos
(
x
);
};
}
};
struct
atan_op
{
std
::
string
name
()
const
{
return
"cpu::atan"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
atan
(
x
);
};
}
};
struct
sinh_op
{
std
::
string
name
()
const
{
return
"cpu::sinh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
sinh
(
x
);
};
}
};
struct
cosh_op
{
std
::
string
name
()
const
{
return
"cpu::cosh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
cosh
(
x
);
};
}
};
struct
tanh_op
{
std
::
string
name
()
const
{
return
"cpu::tanh"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
tanh
(
x
);
};
}
};
struct
sigmoid_op
{
std
::
string
name
()
const
{
return
"cpu::sigmoid"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
1.
f
/
(
1.
f
+
std
::
exp
(
-
x
));
};
}
};
struct
neg_op
{
std
::
string
name
()
const
{
return
"cpu::neg"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
-
x
;
};
}
};
struct
relu_op
{
std
::
string
name
()
const
{
return
"cpu::relu"
;
}
auto
fcn
()
const
{
return
[](
auto
x
)
{
return
std
::
max
(
decltype
(
x
){
0
},
x
);
};
}
};
struct
leaky_relu_op
struct
leaky_relu_op
{
{
op
::
leaky_relu
op
;
op
::
leaky_relu
op
;
...
@@ -592,14 +474,42 @@ template <typename Op>
...
@@ -592,14 +474,42 @@ template <typename Op>
struct
cpu_unary
struct
cpu_unary
{
{
Op
op
;
Op
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
.
op
,
f
);
}
std
::
string
name
()
const
{
return
op
.
name
();
}
std
::
string
name
()
const
{
return
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
inputs
.
front
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
}.
has
(
1
);
auto
s
=
inputs
.
at
(
0
);
if
(
s
.
packed
())
{
return
s
;
}
else
{
return
{
s
.
type
(),
s
.
lens
()};
}
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
{
argument
result
{
output_shape
};
argument
result
{
output_shape
};
result
.
visit
([
&
](
auto
output
)
{
result
.
visit
([
&
](
auto
output
)
{
args
[
0
].
visit
([
&
](
auto
input
)
{
args
[
0
].
visit
([
&
](
auto
input
)
{
if
(
input
.
get_shape
().
standard
())
{
std
::
transform
(
input
.
begin
(),
input
.
end
(),
output
.
begin
(),
op
.
fcn
());
std
::
transform
(
input
.
begin
(),
input
.
end
(),
output
.
begin
(),
op
.
fcn
());
}
else
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
op
.
fcn
()(
input
(
idx
.
begin
(),
idx
.
end
()));
});
}
});
});
});
});
...
@@ -622,20 +532,20 @@ struct softmax2d
...
@@ -622,20 +532,20 @@ struct softmax2d
auto
nw
=
input
.
get_shape
().
lens
()[
3
];
auto
nw
=
input
.
get_shape
().
lens
()[
3
];
dfor
(
nb
,
nh
,
nw
)([
&
](
std
::
size_t
b
,
std
::
size_t
i
,
std
::
size_t
j
)
{
dfor
(
nb
,
nh
,
nw
)([
&
](
std
::
size_t
b
,
std
::
size_t
i
,
std
::
size_t
j
)
{
value_type
cmax
=
std
::
numeric_limits
<
value_type
>::
lowest
();
value_type
cmax
=
std
::
numeric_limits
<
value_type
>::
lowest
();
for
(
in
t
c
=
0
;
c
<
nc
;
c
++
)
for
(
std
::
size_
t
c
=
0
;
c
<
nc
;
c
++
)
{
{
cmax
=
std
::
max
(
cmax
,
input
(
b
,
c
,
i
,
j
));
cmax
=
std
::
max
(
cmax
,
input
(
b
,
c
,
i
,
j
));
}
}
for
(
in
t
c
=
0
;
c
<
nc
;
c
++
)
for
(
std
::
size_
t
c
=
0
;
c
<
nc
;
c
++
)
{
{
output
(
b
,
c
,
i
,
j
)
=
std
::
exp
(
input
(
b
,
c
,
i
,
j
)
-
cmax
);
output
(
b
,
c
,
i
,
j
)
=
std
::
exp
(
input
(
b
,
c
,
i
,
j
)
-
cmax
);
}
}
value_type
sum
=
value_type
(
0
);
value_type
sum
=
value_type
(
0
);
for
(
in
t
c
=
0
;
c
<
nc
;
c
++
)
for
(
std
::
size_
t
c
=
0
;
c
<
nc
;
c
++
)
{
{
sum
+=
output
(
b
,
c
,
i
,
j
);
sum
+=
output
(
b
,
c
,
i
,
j
);
}
}
for
(
in
t
c
=
0
;
c
<
nc
;
c
++
)
for
(
std
::
size_
t
c
=
0
;
c
<
nc
;
c
++
)
{
{
output
(
b
,
c
,
i
,
j
)
=
output
(
b
,
c
,
i
,
j
)
/
sum
;
output
(
b
,
c
,
i
,
j
)
=
output
(
b
,
c
,
i
,
j
)
/
sum
;
}
}
...
@@ -648,6 +558,13 @@ struct softmax2d
...
@@ -648,6 +558,13 @@ struct softmax2d
struct
cpu_logsoftmax
struct
cpu_logsoftmax
{
{
op
::
logsoftmax
op
;
op
::
logsoftmax
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::logsoftmax"
;
}
std
::
string
name
()
const
{
return
"cpu::logsoftmax"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
...
@@ -714,87 +631,6 @@ struct cpu_logsoftmax
...
@@ -714,87 +631,6 @@ struct cpu_logsoftmax
}
}
};
};
struct
add_op
{
std
::
string
name
()
const
{
return
"add"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
x
+
y
;
};
}
};
struct
sub_op
{
std
::
string
name
()
const
{
return
"sub"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
x
-
y
;
};
}
};
struct
mul_op
{
std
::
string
name
()
const
{
return
"mul"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
x
*
y
;
};
}
};
struct
div_op
{
std
::
string
name
()
const
{
return
"div"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
x
/
y
;
};
}
};
struct
max_op
{
std
::
string
name
()
const
{
return
"max"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
max
(
x
,
y
);
};
}
};
struct
min_op
{
std
::
string
name
()
const
{
return
"min"
;
}
auto
fcn
()
const
{
return
[](
auto
x
,
auto
y
)
{
return
std
::
min
(
x
,
y
);
};
}
};
template
<
typename
Op
>
struct
cpu_binary
{
Op
op
;
std
::
string
name
()
const
{
return
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
inputs
.
front
();
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
if
(
input1
.
get_shape
().
packed
()
and
input2
.
get_shape
().
packed
())
{
std
::
transform
(
input1
.
begin
(),
input1
.
end
(),
input2
.
begin
(),
output
.
begin
(),
op
.
fcn
());
}
else
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
op
.
fcn
()(
input1
(
idx
.
begin
(),
idx
.
end
()),
input2
(
idx
.
begin
(),
idx
.
end
()));
});
}
});
return
result
;
}
};
struct
cpu_apply
struct
cpu_apply
{
{
program
*
prog
;
program
*
prog
;
...
@@ -814,42 +650,16 @@ struct cpu_apply
...
@@ -814,42 +650,16 @@ struct cpu_apply
void
init
()
void
init
()
{
{
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"batch_norm_inference"
]
=
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"contiguous"
]
=
extend_op
<
cpu_contiguous
,
op
::
contiguous
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"concat"
]
=
extend_op
<
cpu_concat
,
op
::
concat
>
();
apply_map
[
"gather"
]
=
extend_op
<
cpu_gather
,
op
::
gather
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"identity"
]
=
simple_op
<
cpu_unary
<
identity_op
>>
();
apply_map
[
"abs"
]
=
simple_op
<
cpu_unary
<
abs_op
>>
();
apply_map
[
"sinh"
]
=
simple_op
<
cpu_unary
<
sinh_op
>>
();
apply_map
[
"cosh"
]
=
simple_op
<
cpu_unary
<
cosh_op
>>
();
apply_map
[
"tanh"
]
=
simple_op
<
cpu_unary
<
tanh_op
>>
();
apply_map
[
"sigmoid"
]
=
simple_op
<
cpu_unary
<
sigmoid_op
>>
();
apply_map
[
"exp"
]
=
simple_op
<
cpu_unary
<
exp_op
>>
();
apply_map
[
"log"
]
=
simple_op
<
cpu_unary
<
log_op
>>
();
apply_map
[
"neg"
]
=
simple_op
<
cpu_unary
<
neg_op
>>
();
apply_map
[
"sin"
]
=
simple_op
<
cpu_unary
<
sin_op
>>
();
apply_map
[
"cos"
]
=
simple_op
<
cpu_unary
<
cos_op
>>
();
apply_map
[
"tan"
]
=
simple_op
<
cpu_unary
<
tan_op
>>
();
apply_map
[
"asin"
]
=
simple_op
<
cpu_unary
<
asin_op
>>
();
apply_map
[
"acos"
]
=
simple_op
<
cpu_unary
<
acos_op
>>
();
apply_map
[
"atan"
]
=
simple_op
<
cpu_unary
<
atan_op
>>
();
apply_map
[
"relu"
]
=
simple_op
<
cpu_unary
<
relu_op
>>
();
apply_map
[
"add"
]
=
simple_op
<
cpu_binary
<
add_op
>>
();
apply_map
[
"sub"
]
=
simple_op
<
cpu_binary
<
sub_op
>>
();
apply_map
[
"mul"
]
=
simple_op
<
cpu_binary
<
mul_op
>>
();
apply_map
[
"div"
]
=
simple_op
<
cpu_binary
<
div_op
>>
();
apply_map
[
"max"
]
=
simple_op
<
cpu_binary
<
max_op
>>
();
apply_map
[
"min"
]
=
simple_op
<
cpu_binary
<
min_op
>>
();
apply_map
[
"softmax"
]
=
simple_op
<
softmax2d
>
();
apply_map
[
"softmax"
]
=
simple_op
<
softmax2d
>
();
}
}
...
@@ -866,9 +676,18 @@ struct cpu_apply
...
@@ -866,9 +676,18 @@ struct cpu_apply
{
{
apply_map
.
at
(
it
->
name
())(
it
);
apply_map
.
at
(
it
->
name
())(
it
);
}
}
else
if
(
is_context_free
(
it
->
get_operator
()))
{
apply_cpu_op
(
it
);
}
}
}
}
}
void
apply_cpu_op
(
instruction_ref
ins
)
{
prog
->
replace_instruction
(
ins
,
cpu_op
{
ins
->
get_operator
()},
ins
->
inputs
());
}
template
<
class
T
>
template
<
class
T
>
void
apply_simple_op
(
instruction_ref
ins
)
void
apply_simple_op
(
instruction_ref
ins
)
{
{
...
...
src/targets/gpu/CMakeLists.txt
View file @
bf6f82d8
...
@@ -32,6 +32,7 @@ add_library(migraphx_device
...
@@ -32,6 +32,7 @@ add_library(migraphx_device
device/pad.cpp
device/pad.cpp
device/gather.cpp
device/gather.cpp
device/sub.cpp
device/sub.cpp
device/clip.cpp
)
)
set_target_properties
(
migraphx_device PROPERTIES EXPORT_NAME device
)
set_target_properties
(
migraphx_device PROPERTIES EXPORT_NAME device
)
rocm_clang_tidy_check
(
migraphx_device
)
rocm_clang_tidy_check
(
migraphx_device
)
...
@@ -66,6 +67,7 @@ add_library(migraphx_gpu
...
@@ -66,6 +67,7 @@ add_library(migraphx_gpu
lrn.cpp
lrn.cpp
schedule_model.cpp
schedule_model.cpp
adjust_allocation.cpp
adjust_allocation.cpp
clip.cpp
)
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/abs.cpp
View file @
bf6f82d8
...
@@ -7,7 +7,7 @@ namespace gpu {
...
@@ -7,7 +7,7 @@ namespace gpu {
shape
miopen_abs
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
miopen_abs
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcast
ed
();
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
pack
ed
();
return
inputs
.
at
(
0
);
return
inputs
.
at
(
0
);
}
}
...
...
src/targets/gpu/clip.cpp
0 → 100644
View file @
bf6f82d8
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_clip
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_clip
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
clip
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
max_val
,
op
.
min_val
);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/clip.cpp
0 → 100644
View file @
bf6f82d8
#include <migraphx/gpu/device/clip.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
clip
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
float
max
,
const
float
min
)
{
nary
(
stream
,
result
,
arg1
)(
[
max
,
min
](
auto
x
)
{
return
std
::
min
<
decltype
(
x
)
>
(
std
::
max
<
decltype
(
x
)
>
(
min
,
x
),
max
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/gather.cpp
View file @
bf6f82d8
...
@@ -16,7 +16,7 @@ argument gather(hipStream_t stream,
...
@@ -16,7 +16,7 @@ argument gather(hipStream_t stream,
std
::
vector
<
migraphx
::
argument
>
args
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
int
axis
)
{
{
int
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
get_shape
().
lens
().
size
())
:
axis
;
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
get_shape
().
lens
().
size
())
:
axis
;
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
output_shape
.
elements
();
std
::
size_t
nelements
=
output_shape
.
elements
();
args
[
1
].
visit
([
&
](
auto
indices
)
{
args
[
1
].
visit
([
&
](
auto
indices
)
{
...
...
src/targets/gpu/fuse_ops.cpp
View file @
bf6f82d8
...
@@ -162,7 +162,10 @@ struct hip_triadd
...
@@ -162,7 +162,10 @@ struct hip_triadd
device
::
add
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
device
::
add
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
return
args
.
at
(
3
);
}
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
hip_triadd_relu
struct
hip_triadd_relu
...
@@ -178,7 +181,10 @@ struct hip_triadd_relu
...
@@ -178,7 +181,10 @@ struct hip_triadd_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
return
args
.
at
(
3
);
}
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
hip_add_relu
struct
hip_add_relu
...
@@ -194,7 +200,10 @@ struct hip_add_relu
...
@@ -194,7 +200,10 @@ struct hip_add_relu
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
return
args
.
at
(
2
);
return
args
.
at
(
2
);
}
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
find_add_relu
struct
find_add_relu
...
@@ -285,7 +294,10 @@ struct miopen_conv_bias
...
@@ -285,7 +294,10 @@ struct miopen_conv_bias
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
struct
miopen_conv_bias_relu
struct
miopen_conv_bias_relu
...
@@ -332,7 +344,10 @@ struct miopen_conv_bias_relu
...
@@ -332,7 +344,10 @@ struct miopen_conv_bias_relu
}
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
f
.
compile
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
shape
get_workspace
(
context
&
ctx
)
{
return
f
.
get_workspace
(
ctx
);
}
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
template
<
class
...
Ms
>
template
<
class
...
Ms
>
...
...
src/targets/gpu/include/migraphx/gpu/abs.hpp
View file @
bf6f82d8
...
@@ -13,11 +13,21 @@ struct context;
...
@@ -13,11 +13,21 @@ struct context;
struct
miopen_abs
struct
miopen_abs
{
{
shared
<
activation_descriptor
>
ad
;
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
gpu
::
reflect
(
self
.
ad
.
get
(),
f
);
}
std
::
string
name
()
const
{
return
"gpu::abs"
;
}
std
::
string
name
()
const
{
return
"gpu::abs"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/batchnorm.hpp
View file @
bf6f82d8
...
@@ -13,11 +13,21 @@ struct context;
...
@@ -13,11 +13,21 @@ struct context;
struct
miopen_batch_norm_inference
struct
miopen_batch_norm_inference
{
{
op
::
batch_norm_inference
op
;
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::batch_norm_inference"
;
}
std
::
string
name
()
const
{
return
"gpu::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/clip.hpp
0 → 100644
View file @
bf6f82d8
#ifndef MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_CLIP_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_clip
{
op
::
clip
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::clip"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/concat.hpp
View file @
bf6f82d8
...
@@ -14,11 +14,20 @@ struct hip_concat
...
@@ -14,11 +14,20 @@ struct hip_concat
{
{
op
::
concat
op
;
op
::
concat
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/contiguous.hpp
View file @
bf6f82d8
...
@@ -13,10 +13,20 @@ struct context;
...
@@ -13,10 +13,20 @@ struct context;
struct
miopen_contiguous
struct
miopen_contiguous
{
{
op
::
contiguous
op
;
op
::
contiguous
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::contiguous"
;
}
std
::
string
name
()
const
{
return
"gpu::contiguous"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
argument
compute
(
context
&
,
shape
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
bf6f82d8
...
@@ -31,7 +31,10 @@ struct miopen_convolution
...
@@ -31,7 +31,10 @@ struct miopen_convolution
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
shape
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/device/clip.hpp
0 → 100644
View file @
bf6f82d8
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CLIP_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
clip
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
float
max
,
float
min
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/elu.hpp
View file @
bf6f82d8
...
@@ -13,11 +13,21 @@ struct context;
...
@@ -13,11 +13,21 @@ struct context;
struct
miopen_elu
struct
miopen_elu
{
{
shared
<
activation_descriptor
>
ad
;
shared
<
activation_descriptor
>
ad
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
gpu
::
reflect
(
self
.
ad
.
get
(),
f
);
}
std
::
string
name
()
const
{
return
"gpu::elu"
;
}
std
::
string
name
()
const
{
return
"gpu::elu"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/gather.hpp
View file @
bf6f82d8
...
@@ -14,11 +14,21 @@ struct context;
...
@@ -14,11 +14,21 @@ struct context;
struct
hip_gather
struct
hip_gather
{
{
op
::
gather
op
;
op
::
gather
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::gather"
;
}
std
::
string
name
()
const
{
return
"gpu::gather"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
int
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment